Changeset 388


Ignore:
Timestamp:
05/14/08 17:51:58 (4 years ago)
Author:
kasper
Message:

Ticket #72: Added drill-to-detail features to "Pattern finder" profile. Fixed.

Location:
datacleaner/DataCleaner-core/trunk/src
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • datacleaner/DataCleaner-core/trunk/src/main/java/dk/eobjects/datacleaner/profiler/pattern/PatternFinderProfile.java

    r386 r388  
    2626import dk.eobjects.datacleaner.profiler.IMatrix; 
    2727import dk.eobjects.datacleaner.profiler.MatrixBuilder; 
     28import dk.eobjects.datacleaner.profiler.MatrixValue; 
     29import dk.eobjects.metamodel.data.IRowFilter; 
    2830import dk.eobjects.metamodel.data.Row; 
    2931import dk.eobjects.metamodel.schema.Column; 
     
    4143                        _patternRecognizers.put(column, patternRecognizer); 
    4244                } 
    43                 for (int i = 0; i < valueCount; i++) { 
    44                         if (value != null) { 
    45                                 patternRecognizer.addInstance(value.toString()); 
    46                         } 
     45                if (value != null) { 
     46                        patternRecognizer.addInstance(value.toString(), valueCount); 
    4747                } 
    4848        } 
     
    5252                List<IMatrix> result = new ArrayList<IMatrix>(); 
    5353 
    54                 for (Column column : _columns) { 
     54                for (final Column column : _columns) { 
    5555                        MatrixBuilder mb = new MatrixBuilder(); 
    5656                        mb.addColumn(column.getName()); 
    5757 
    58                         PatternRecognizer patternRecognizer = _patternRecognizers 
     58                        final PatternRecognizer patternRecognizer = _patternRecognizers 
    5959                                        .get(column); 
    60                         Map<String, Integer> patterns = patternRecognizer 
    61                                         .identifyPatterns(); 
     60                        Map<String, Long> patterns = patternRecognizer.identifyPatterns(); 
    6261                        Set<String> keys = patterns.keySet(); 
    63                         for (String patternName : keys) { 
    64                                 Integer patternCount = patterns.get(patternName); 
    65                                 mb.addRow(patternName, patternCount); 
     62                        for (final String patternName : keys) { 
     63                                Long patternCount = patterns.get(patternName); 
     64                                MatrixValue[] matrixValues = mb.addRow(patternName, 
     65                                                patternCount); 
     66                                MatrixValue mv = matrixValues[0]; 
     67                                mv.setDetailSource(getBaseQuery(column)); 
     68                                mv.addDetailRowFilter(new IRowFilter() { 
     69 
     70                                        public boolean accept(Row row) { 
     71                                                Object value = row.getValue(column); 
     72                                                if (value != null) { 
     73                                                        return patternRecognizer.patternEquals(patternName, 
     74                                                                        value.toString()); 
     75                                                } 
     76                                                return false; 
     77                                        } 
     78 
     79                                }); 
    6680                        } 
    6781                        if (!mb.isEmpty()) { 
  • datacleaner/DataCleaner-core/trunk/src/main/java/dk/eobjects/datacleaner/profiler/pattern/PatternRecognizer.java

    r281 r388  
    2828        private Map<String, PatternDefinition> _patternMap = new TreeMap<String, PatternDefinition>(); 
    2929 
    30         public void addInstance(String string) { 
     30        public void addInstance(String string, long count) { 
    3131                Token[] tokens = _tokenizer.tokenize(string); 
     32                String patternMapKey = toPattern(tokens); 
     33 
     34                PatternDefinition patternDefinition = _patternMap.get(patternMapKey); 
     35                if (patternDefinition == null) { 
     36                        patternDefinition = new PatternDefinition(); 
     37                        _patternMap.put(patternMapKey, patternDefinition); 
     38                } 
     39                patternDefinition.addInstanceData(tokens, count); 
     40        } 
     41 
     42        /** 
     43         * Creates a unique string representing this pattern's token-composition 
     44         */ 
     45        private static String toPattern(Token[] tokens) { 
    3246                StringBuilder sb = new StringBuilder(); 
    3347                for (int i = 0; i < tokens.length; i++) { 
     
    4054                        } 
    4155                } 
    42  
    43                 String patternMapKey = sb.toString(); 
    44                 sb = null; 
    45  
    46                 PatternDefinition patternDefinition = _patternMap.get(patternMapKey); 
    47                 if (patternDefinition == null) { 
    48                         patternDefinition = new PatternDefinition(); 
    49                         _patternMap.put(patternMapKey, patternDefinition); 
    50                 } 
    51                 patternMapKey = null; 
    52                 patternDefinition.addInstanceData(tokens); 
     56                return sb.toString(); 
    5357        } 
    5458 
    55         public Map<String, Integer> identifyPatterns() { 
    56                 Map<String, Integer> result = new LinkedHashMap<String, Integer>(); 
     59        public Map<String, Long> identifyPatterns() { 
     60                Map<String, Long> result = new LinkedHashMap<String, Long>(); 
    5761 
    5862                Collection<PatternDefinition> values = _patternMap.values(); 
     
    6468        } 
    6569 
     70        /** 
     71         * @param patternName 
     72         * @param value 
     73         * @return true if the value is among the instances that produced the 
     74         *         patternName 
     75         */ 
     76        public boolean patternEquals(String patternName, String value) { 
     77                Token[] tokens = _tokenizer.tokenize(value); 
     78                String patternMapKey = toPattern(tokens); 
     79                PatternDefinition patternDefinition = _patternMap.get(patternMapKey); 
     80                if (patternDefinition != null) { 
     81                        return patternDefinition.toString().equals(patternName); 
     82                } 
     83                return false; 
     84        } 
     85 
     86        /** 
     87         * Convenience class that holds string representations of each token in a 
     88         * pattern like "aaa" instead of "foo" or "999" instead of "432". Each 
     89         * string-representation will grow in size if new token instances appear, so 
     90         * the tokens "foo" and "foobar" will yield a "aaaaaa" string 
     91         * representation. 
     92         *  
     93         * Furthermore the pattern definition holds a simple counter to increment on 
     94         * each added observation 
     95         */ 
    6696        private class PatternDefinition { 
    6797 
    68                 private int _count = 0; 
     98                private long _count = 0l; 
    6999                private String[] _symbols; 
    70100 
    71                 public void addInstanceData(Token[] tokens) { 
     101                public void addInstanceData(Token[] tokens, long count) { 
    72102                        if (_symbols == null) { 
    73103                                _symbols = new String[tokens.length]; 
     
    75105                        } 
    76106 
    77                         _count++; 
     107                        _count += count; 
    78108                        for (int i = 0; i < tokens.length; i++) { 
    79109                                if (tokens[i].getLength() > _symbols[i].length()) { 
     
    113143                } 
    114144 
    115                 public int getCount() { 
     145                public long getCount() { 
    116146                        return _count; 
    117147                } 
  • datacleaner/DataCleaner-core/trunk/src/test/java/dk/eobjects/datacleaner/execution/ProfileRunnerTest.java

    r384 r388  
    105105                String[] expectations = { 
    106106                                "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Standard measures,profileClass=class dk.eobjects.datacleaner.profiler.trivial.StandardMeasuresProfile],matrices={Matrix[columnNames={POSTALCODE,OFFICECODE},Row count={7,7},Null values={0,0},Empty values={0,0},Highest value={NSW 2010,7},Lowest value={02107,1}]}]", 
    107                                 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={ADDRESSLINE2},aaaaa 999={11},??? aaaaa={1},aaaaa aa. 9={1}]}]" }; 
     107                                "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={ADDRESSLINE2},aaaaa 999={MatrixValue[value=11,detailQuery=SELECT CUSTOMERS.ADDRESSLINE2, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.ADDRESSLINE2]},??? aaaaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.ADDRESSLINE2, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.ADDRESSLINE2]},aaaaa aa. 9={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.ADDRESSLINE2, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.ADDRESSLINE2]}]}]" }; 
    108108 
    109109                assertEquals(expectations.length, results.size()); 
     
    179179                String[] expectations = { 
    180180                                "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Standard measures,profileClass=class dk.eobjects.datacleaner.profiler.trivial.StandardMeasuresProfile],matrices={Matrix[columnNames={CUSTOMERNAME},Row count={122},Null values={0},Empty values={0},Highest value={giftsbymail.co.uk},Lowest value={ANG Resellers}]}]", 
    181                                 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={COUNTRY},aaaaaaaaaaa={116},aaaaa aaaaaaa={6}],Matrix[columnNames={CUSTOMERNAME},aaaaaaaaaa aaaaaaaaaaaa={22},aaaaaaaaaaaa aaaaaaaaaaa aaaaaaaaaaaa={15},aaaaaaaaa aaaaaaaaaaaa aaaa.={13},aaaaaaaaaa aaaaaaaaaaaaa, aaa={9},aaaaaaaaaaaa aaaaaaa aaaaaaaaaaaa aaa.={9},aaaaaaaaaa aaaaaaaaaaaa, aaa.={8},aaaaaaaa aaaaaaaaaaaa aaaaaaaaaaaa, aaaa.={8},aaaaaaaaaa aaaaaaaa aaaaaaaaaaa, aaa={8},aaaaaaaa aaaaa aaaaaaaa aaaaaaaaa={3},aaaaa aaaaaaaaa & aaa.={3},?????????????.aaa={2},aaaaaaaaaaaa.aaa={2},aaaaaaaaaa aaa.={2},aaaaaa & aaaa aa.={2},aaaaaaaaaaa.aa.aa={1},aaaa-aaaa aaaaaaaa aaa.={1},aaaa+ aaaaaaaa aaaaaaa={1},aaaa'a aaaaaaaaaaa, aaa={1},aaaaa'a aaaaaaaa aa.={1},aaaaa'a aaaa aaaa={1},a'aaaaaa aaaaaaaaaa={1},aa&a aaaaaaaaaaaa={1},aaaa aaaaa+ aaaaa={1},aaaaaa aaaaa& aa={1},aa aaaaa a'aaaaaaaaa, aa.={1},aaaaaa aaaaa aa aaaa, aa.={1},aaaaaa aaaaaa aaaa aaaaaa, aaa={1},aaa 'a' aa aaaaaaaaa, aaa.={1},aaaaaaa & aaaaaaa, aa.={1},aaaaa & aaaaaaa aa={1}]}]" }; 
     181                                "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={COUNTRY},aaaaaaaaaaa={MatrixValue[value=116,detailQuery=SELECT CUSTOMERS.COUNTRY, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.COUNTRY]},aaaaa aaaaaaa={MatrixValue[value=6,detailQuery=SELECT CUSTOMERS.COUNTRY, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.COUNTRY]}],Matrix[columnNames={CUSTOMERNAME},aaaaaaaaaa aaaaaaaaaaaa={MatrixValue[value=22,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaaaa aaaaaaaaaaa aaaaaaaaaaaa={MatrixValue[value=15,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaa aaaaaaaaaaaa aaaa.={MatrixValue[value=13,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaa aaaaaaaaaaaaa, aaa={MatrixValue[value=9,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaaaa aaaaaaa aaaaaaaaaaaa aaa.={MatrixValue[value=9,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaa aaaaaaaaaaaa, aaa.={MatrixValue[value=8,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaa aaaaaaaaaaaa aaaaaaaaaaaa, aaaa.={MatrixValue[value=8,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaa aaaaaaaa aaaaaaaaaaa, aaa={MatrixValue[value=8,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaa aaaaa aaaaaaaa aaaaaaaaa={MatrixValue[value=3,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaa aaaaaaaaa & aaa.={MatrixValue[value=3,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},?????????????.aaa={MatrixValue[value=2,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaaaa.aaa={MatrixValue[value=2,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaa aaa.={MatrixValue[value=2,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaa & aaaa aa.={MatrixValue[value=2,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaaa.aa.aa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaa-aaaa aaaaaaaa aaa.={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaa+ aaaaaaaa aaaaaaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaa'a aaaaaaaaaaa, aaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaa'a aaaaaaaa aa.={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaa'a aaaa aaaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},a'aaaaaa aaaaaaaaaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aa&a aaaaaaaaaaaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaa aaaaa+ aaaaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaa aaaaa& aa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aa aaaaa a'aaaaaaaaa, aa.={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaa aaaaa aa aaaa, aa.={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaa aaaaaa aaaa aaaaaa, aaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaa 'a' aa aaaaaaaaa, aaa.={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaa & aaaaaaa, aa.={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaa & aaaaaaa aa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]}]}]" }; 
    182182 
    183183                assertEquals(2, results.size()); 
     
    215215 
    216216                String[] expectations = { 
    217                                 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={EMPLOYEENUMBER},9999={23}]}]", 
    218                                 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={CUSTOMERNUMBER},999={122}]}]", 
     217                                "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={EMPLOYEENUMBER},9999={MatrixValue[value=23,detailQuery=SELECT EMPLOYEES.EMPLOYEENUMBER, COUNT(*) FROM EMPLOYEES GROUP BY EMPLOYEES.EMPLOYEENUMBER]}]}]", 
     218                                "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={CUSTOMERNUMBER},999={MatrixValue[value=122,detailQuery=SELECT CUSTOMERS.CUSTOMERNUMBER, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNUMBER]}]}]", 
    219219                                "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Standard measures,profileClass=class dk.eobjects.datacleaner.profiler.trivial.StandardMeasuresProfile],matrices={Matrix[columnNames={ADDRESSLINE1,ADDRESSLINE2},Row count={122,122},Null values={0,MatrixValue[value=109,detailQuery=SELECT CUSTOMERS.ADDRESSLINE1, CUSTOMERS.ADDRESSLINE2 FROM CUSTOMERS WHERE CUSTOMERS.ADDRESSLINE2 IS NULL]},Empty values={0,0},Highest value={à
    220220kergatan 24,Suite 750},Lowest value={1 rue Alsace-Lorraine,2nd Floor}]}]" }; 
  • datacleaner/DataCleaner-core/trunk/src/test/java/dk/eobjects/datacleaner/profiler/pattern/PatternFinderProfileTest.java

    r372 r388  
    5757 
    5858                assertEquals( 
    59                                 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={PRODUCTLINE},aaaaaaa aaaa={62},aaaaaaaaaaa={37},aaaaaa aaa aaaaa={11}]}]", 
     59                                "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={PRODUCTLINE},aaaaaaa aaaa={MatrixValue[value=62,detailQuery=SELECT PRODUCTS.PRODUCTLINE, COUNT(*) FROM PRODUCTS GROUP BY PRODUCTS.PRODUCTLINE]},aaaaaaaaaaa={MatrixValue[value=37,detailQuery=SELECT PRODUCTS.PRODUCTLINE, COUNT(*) FROM PRODUCTS GROUP BY PRODUCTS.PRODUCTLINE]},aaaaaa aaa aaaaa={MatrixValue[value=11,detailQuery=SELECT PRODUCTS.PRODUCTLINE, COUNT(*) FROM PRODUCTS GROUP BY PRODUCTS.PRODUCTLINE]}]}]", 
    6060                                result.toString()); 
    6161        } 
  • datacleaner/DataCleaner-core/trunk/src/test/java/dk/eobjects/datacleaner/profiler/pattern/PatternRecognizerTest.java

    r281 r388  
    2727        public void testIdentifyAddressPatterns() throws Exception { 
    2828                PatternRecognizer patternRecognizer = new PatternRecognizer(); 
    29                 patternRecognizer.addInstance("Osterbrogade 3, DK2100 Koebenhavn O"); 
    30                 patternRecognizer.addInstance("Noerrebrogade 214, DK2200 Koebenhavn N"); 
    31                 patternRecognizer.addInstance("Osterbrogade 4, 2100 Koebenhavn O"); 
    32                 patternRecognizer.addInstance("Noerrebrogade 215, 2200 Koebenhavn N"); 
    33                 patternRecognizer.addInstance("Byvej 2, 2. th, 2200 Koebenhavn N"); 
     29                patternRecognizer 
     30                                .addInstance("Osterbrogade 3, DK2100 Koebenhavn O", 1l); 
     31                patternRecognizer.addInstance("Noerrebrogade 214, DK2200 Koebenhavn N", 
     32                                1l); 
     33                patternRecognizer.addInstance("Osterbrogade 4, 2100 Koebenhavn O", 1l); 
     34                patternRecognizer.addInstance("Noerrebrogade 215, 2200 Koebenhavn N", 
     35                                1l); 
     36                patternRecognizer.addInstance("Byvej 2, 2. th, 2200 Koebenhavn N", 1l); 
    3437 
    35                 Map<String, Integer> patternMap = patternRecognizer.identifyPatterns(); 
     38                Map<String, Long> patternMap = patternRecognizer.identifyPatterns(); 
    3639 
    3740                assertEquals(3, patternMap.size()); 
    3841 
    39                 Integer patternCount = patternMap 
     42                Long patternCount = patternMap 
    4043                                .get("aaaaaaaaaaaaa 999, 9999 aaaaaaaaaa a"); 
    4144                assertEquals(2, patternCount.intValue()); 
     
    5053        public void testIdentifyNamePatterns() throws Exception { 
    5154                PatternRecognizer patternRecognizer = new PatternRecognizer(); 
    52                 patternRecognizer.addInstance("Kasper Soerensen"); 
    53                 patternRecognizer.addInstance("Mr. Kasper Soerensen"); 
    54                 patternRecognizer.addInstance("Soerensen, Kasper"); 
    55                 patternRecognizer.addInstance("Mr Kasper Soerensen"); 
    56                 patternRecognizer.addInstance("Jesper Lind"); 
     55                patternRecognizer.addInstance("Kasper Soerensen", 1l); 
     56                patternRecognizer.addInstance("Mr. Kasper Soerensen", 1l); 
     57                patternRecognizer.addInstance("Soerensen, Kasper", 1l); 
     58                patternRecognizer.addInstance("Mr Kasper Soerensen", 1l); 
     59                patternRecognizer.addInstance("Jesper Lind", 1l); 
    5760 
    58                 Map<String, Integer> patternMap = patternRecognizer.identifyPatterns(); 
     61                Map<String, Long> patternMap = patternRecognizer.identifyPatterns(); 
    5962                assertEquals(4, patternMap.size()); 
    6063 
    61                 Integer patternCount = patternMap.get("aa. aaaaaa aaaaaaaaa"); 
     64                Long patternCount = patternMap.get("aa. aaaaaa aaaaaaaaa"); 
    6265                assertEquals(1, patternCount.intValue()); 
    6366 
     
    7477        public void testSingleCharacter() throws Exception { 
    7578                PatternRecognizer patternRecognizer = new PatternRecognizer(); 
    76                 patternRecognizer.addInstance("a"); 
    77                 patternRecognizer.addInstance("b"); 
    78                 Map<String, Integer> patterns = patternRecognizer.identifyPatterns(); 
     79                patternRecognizer.addInstance("a", 1l); 
     80                patternRecognizer.addInstance("b", 1l); 
     81                Map<String, Long> patterns = patternRecognizer.identifyPatterns(); 
    7982                assertEquals(1, patterns.size()); 
    8083                assertEquals(2, patterns.get("a").intValue()); 
     
    8386        public void testNumberInput() throws Exception { 
    8487                PatternRecognizer patternRecognizer = new PatternRecognizer(); 
    85                 patternRecognizer.addInstance("124"); 
    86                 patternRecognizer.addInstance("4"); 
    87                 patternRecognizer.addInstance("4324"); 
    88                 patternRecognizer.addInstance("543"); 
    89                 patternRecognizer.addInstance("2"); 
    90                 patternRecognizer.addInstance("31"); 
    91                 patternRecognizer.addInstance("943242872"); 
     88                patternRecognizer.addInstance("124", 1l); 
     89                patternRecognizer.addInstance("4", 1l); 
     90                patternRecognizer.addInstance("4324", 1l); 
     91                patternRecognizer.addInstance("543", 1l); 
     92                patternRecognizer.addInstance("2", 1l); 
     93                patternRecognizer.addInstance("31", 1l); 
     94                patternRecognizer.addInstance("943242872", 1l); 
    9295 
    93                 Map<String, Integer> patternMap = patternRecognizer.identifyPatterns(); 
     96                Map<String, Long> patternMap = patternRecognizer.identifyPatterns(); 
    9497                assertEquals(1, patternMap.size()); 
    9598                assertEquals(7, patternMap.get("999999999").intValue()); 
     
    98101        public void testToStringCompliance() throws Exception { 
    99102                PatternRecognizer patternRecognizer = new PatternRecognizer(); 
    100                 patternRecognizer.addInstance("Kasper Soerensen"); 
    101                 patternRecognizer.addInstance("Mr. Kasper Soerensen"); 
    102                 patternRecognizer.addInstance("Soerensen, Kasper"); 
    103                 patternRecognizer.addInstance("Mr Kasper Soerensen"); 
    104                 patternRecognizer.addInstance("Jesper Lind"); 
     103                patternRecognizer.addInstance("Kasper Soerensen", 1l); 
     104                patternRecognizer.addInstance("Mr. Kasper Soerensen", 1l); 
     105                patternRecognizer.addInstance("Soerensen, Kasper", 1l); 
     106                patternRecognizer.addInstance("Mr Kasper Soerensen", 1l); 
     107                patternRecognizer.addInstance("Jesper Lind", 1l); 
    105108 
    106                 Map<String, Integer> patternMap = patternRecognizer.identifyPatterns(); 
     109                Map<String, Long> patternMap = patternRecognizer.identifyPatterns(); 
    107110                assertEquals( 
    108111                                "{aaaaaa aaaaaaaaa=2, aa aaaaaa aaaaaaaaa=1, aaaaaaaaa, aaaaaa=1, aa. aaaaaa aaaaaaaaa=1}", 
    109112                                patternMap.toString()); 
    110113        } 
     114 
     115        public void testPatternEquals() throws Exception { 
     116                PatternRecognizer patternRecognizer = new PatternRecognizer(); 
     117                patternRecognizer.addInstance("Kasper Soerensen", 1l); 
     118                patternRecognizer.addInstance("Asbjoern Leeth", 1l); 
     119                Map<String, Long> patterns = patternRecognizer.identifyPatterns(); 
     120                assertEquals("{aaaaaaaa aaaaaaaaa=2}", patterns.toString()); 
     121 
     122                assertTrue(patternRecognizer.patternEquals("aaaaaaaa aaaaaaaaa", 
     123                                "Kasp Soeren")); 
     124                assertFalse(patternRecognizer.patternEquals("aaaaaaaa aaaaaaaaa", 
     125                                "Kasp Something-with-mixed")); 
     126                assertFalse(patternRecognizer.patternEquals("aaaaaaaa aaaaaaaaa", 
     127                                "Kasp er Soerensen")); 
     128        } 
    111129} 
Note: See TracChangeset for help on using the changeset viewer.