Bladeren bron

last changes

ailton 7 jaren geleden
bovenliggende
commit
7fd1f3ad02

+ 6 - 0
pom.xml

@@ -24,6 +24,12 @@
24 24
   	<artifactId>gs-core</artifactId>
25 25
   	<version>1.2</version>
26 26
   </dependency>
27
+  <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
28
+ <dependency>
29
+    <groupId>org.apache.commons</groupId>
30
+    <artifactId>commons-lang3</artifactId>
31
+    <version>3.4</version>
32
+  </dependency>
27 33
   <dependency>
28 34
   	<groupId>org.jgrapht</groupId>
29 35
   	<artifactId>jgrapht-core</artifactId>

+ 82 - 42
src/graph/extractor/graph/builder/BuilderFactory.java

@@ -1,17 +1,11 @@
1 1
 package graph.extractor.graph.builder;
2 2
 
3 3
 import java.io.File;
4
-import java.io.IOException;
5
-import java.nio.file.Files;
6 4
 import java.util.ArrayList;
7 5
 import java.util.List;
8
-import java.util.Random;
9
-
10
-import org.apache.lucene.index.IndexWriter;
11
-
12
-import dao.connection.LuceneConnector;
6
+import java.util.Random;	
13 7
 import dao.entity.DOProcess;
14
-import dao.lucene.tables.DAOJudge;
8
+import dao.lucene.cache.Cache;
15 9
 import graph.input.file.InputFactory;
16 10
 import graph.output.OutPutFactory;
17 11
 
@@ -19,7 +13,9 @@ public abstract class BuilderFactory {
19 13
 
20 14
 	protected List<DOProcess> processes;
21 15
 	protected InputFactory input;
22
-
16
+	public static long processIndex = 1;
17
+	
18
+	
23 19
 	public BuilderFactory(){
24 20
 		this.processes = new ArrayList<DOProcess>();
25 21
 
@@ -37,81 +33,125 @@ public abstract class BuilderFactory {
37 33
 		long originalSize = allDateFiles.size();
38 34
 		
39 35
 		
36
+
40 37
 		while(allDateFiles.size() > 0)
41 38
 		{
42 39
 			try{
43 40
 				Random rand = new Random();
44
-				
41
+
45 42
 				int randNumber = Math.abs(rand.nextInt(allDateFiles.size()));
46 43
 				File oneDayOfDO = allDateFiles.get(randNumber);
47
-				
44
+
48 45
 				List<String> allProcess  = input.getAllProcess(oneDayOfDO);
49
-				
46
+
50 47
 				System.out.println("Processando dia "+oneDayOfDO.getName() + "---> " + allProcess.size() +" processos");
51 48
 				System.out.println("Processados " +(originalSize - allDateFiles.size())+" de " + originalSize+" diarios");
49
+
50
+
51
+
52 52
 				for(String oneProcess: allProcess)
53 53
 				{
54 54
 					DOProcess process = this.extractProcess(oneProcess);
55 55
 					if(process != null)
56 56
 					{
57 57
 						process.setProcessDate(oneDayOfDO.getName());
58
+						process.setName(process.getProcessNumber() + oneDayOfDO.getName());
58 59
 						this.processes.add(process);
59 60
 					}
60
-					
61
-					
61
+
62 62
 				}
63
+
64
+
63 65
 				output.buildOutput(this.processes);
64
-				
66
+
67
+
68
+
65 69
 				//this.moveDoneFiles(oneDayOfDO);
66 70
 				this.processes.clear();
67 71
 				allProcess.clear();
68 72
 				allDateFiles.remove(oneDayOfDO);
69 73
 				
74
+				this.print();
75
+				this.store();
76
+				this.clear();
77
+				this.restart();
78
+				System.gc();
79
+
70 80
 			}catch(Exception e)
71 81
 			{
72 82
 				e.printStackTrace();
73 83
 			}
74
-			
84
+
75 85
 			System.out.println( "Completado " +(originalSize - allDateFiles.size())+" de " + originalSize +". Faltam "+ allDateFiles.size() +" Processos");
86
+	
87
+
88
+
89
+
76 90
 			System.out.println("\n\n\n\n");
77
-			
78 91
 
79 92
 		}
80
-		DAOJudge daoJudge = DAOJudge.getInstance();
81
-		daoJudge.store();
82
-		try {
83
-			LuceneConnector.getIndexDisk().commit();
84
-		} catch (IOException e) {
85
-			// TODO Auto-generated catch block
86
-			e.printStackTrace();
87
-		}
88
-		
89
-		
90 93
 
94
+		//		afterInsertAll();
91 95
 
92
-	}
96
+	
93 97
 
94
-	private void moveDoneFiles(File oneDayOfDO) {
95
-		
96
-		try{
97
-			
98
+
99
+
100
+
101
+
102
+
103
+
104
+	}
105
+	
106
+	private void restart() {
107
+		Cache.getCache();
98 108
 		
99
-		List<File> files = input.getFilesFromDate(oneDayOfDO);
109
+	}
110
+
111
+	private void clear() {
112
+		// TODO Auto-generated method stub
113
+		Cache.delete();
100 114
 		
101 115
 		
102 116
 		
103
-		for(File file: files)
104
-		{
105
-			File doneFolder = new File("/home/ailton/Monografia/done/"+file.getName());
106
-			Files.move(file.toPath(), doneFolder.toPath());
107
-			
108
-		}}catch(Exception e)
109
-		{
110
-			e.printStackTrace();
111
-		}
117
+	}
118
+
119
+	public void print()
120
+	{
121
+		Cache.getCache().printCache();
122
+	}
123
+	
124
+	
125
+	public void store()
126
+	{
127
+		Cache cache = Cache.getCache();
128
+		cache.store();
112 129
 		
113 130
 		
114 131
 	}
132
+	
133
+
134
+	//	private void moveDoneFiles(File oneDayOfDO) {
135
+	//		
136
+	//		try{
137
+	//			
138
+	//		
139
+	//		List<File> files = input.getFilesFromDate(oneDayOfDO);
140
+	//		
141
+	//		
142
+	//		
143
+	//		for(File file: files)
144
+	//		{
145
+	//			File doneFolder = new File("/home/ailton/Monografia/done/"+file.getName());
146
+	//			Files.move(file.toPath(), doneFolder.toPath());
147
+	//			
148
+	//		}}catch(Exception e)
149
+	//		{
150
+	//			e.printStackTrace();
151
+	//		}
152
+	//		
153
+	//		
154
+	//	}
115 155
 
116 156
 	public List<DOProcess> getAllProcess()
117 157
 	{

+ 82 - 50
src/graph/extractor/graph/builder/STJBuilder.java

@@ -2,6 +2,8 @@ package graph.extractor.graph.builder;
2 2
 
3 3
 
4 4
 
5
+import org.apache.commons.lang3.StringUtils;
6
+
5 7
 import dao.entity.DOProcess;
6 8
 import dao.entity.Entity;
7 9
 import dao.entity.Judge;
@@ -16,6 +18,9 @@ public class STJBuilder extends BuilderFactory{
16 18
 	private String normalizeJudgeName(String judgeName)
17 19
 	{
18 20
 		
21
+	    judgeName = StringUtils.stripAccents(judgeName);
22
+
23
+		
19 24
 		if(judgeName.contains("(") && !judgeName.startsWith("(") )
20 25
 		{
21 26
 
@@ -44,7 +49,8 @@ public class STJBuilder extends BuilderFactory{
44 49
 	
45 50
 	private String normalizeEntityName(String entityName)
46 51
 	{
47
-		String originalEntityName = entityName.trim();
52
+		
53
+		entityName = StringUtils.stripAccents(entityName);
48 54
 		
49 55
 		if(entityName.contains("(PRESO)"))
50 56
 		{
@@ -62,6 +68,25 @@ public class STJBuilder extends BuilderFactory{
62 68
 		{
63 69
 			entityName = entityName.split("\\(")[0].trim();
64 70
 		}
71
+		if(entityName.contains("ASSIST"))
72
+		{
73
+			
74
+			entityName = entityName.split("ASSIST")[0].trim();
75
+			entityName = entityName.substring(0, entityName.length() -1);
76
+			
77
+			
78
+			
79
+		}
80
+		if(entityName.contains("CONVOCADA"))
81
+		{
82
+			
83
+			entityName = entityName.split("CONVOCADA")[0].trim();
84
+			entityName = entityName.substring(0, entityName.length() -1);
85
+			
86
+			
87
+			
88
+		}
89
+		
65 90
 		
66 91
 		
67 92
 		if(entityName.length() > 0)
@@ -75,9 +100,14 @@ public class STJBuilder extends BuilderFactory{
75 100
 	private Lawyer normalizeLawyerName(Lawyer lawyer)
76 101
 	{
77 102
 		
78
-		//String originalEntityName = entityName.trim();
103
+		
104
+		
79 105
 		String nameNormalized = lawyer.getName().replaceAll("\\r|\\n", " ").trim();
80
-
106
+		nameNormalized = StringUtils.stripAccents(nameNormalized);
107
+		
108
+		nameNormalized = nameNormalized.replaceAll("\"", "").trim();
109
+		nameNormalized = nameNormalized.replaceAll("/", "").trim();
110
+		 
81 111
 
82 112
 		try{
83 113
 			
@@ -92,6 +122,13 @@ public class STJBuilder extends BuilderFactory{
92 122
 				}
93 123
 				nameNormalized = nameNormalized.split("-")[0].trim();
94 124
 			}
125
+			if(nameNormalized.contains("ASSIST"))
126
+			{
127
+				
128
+				nameNormalized = nameNormalized.split("ASSIST")[0].trim();
129
+				nameNormalized = nameNormalized.substring(0, nameNormalized.length() -1);
130
+				
131
+			}
95 132
 			
96 133
 			if(nameNormalized.contains("E OUTRO"))
97 134
 			{
@@ -103,6 +140,36 @@ public class STJBuilder extends BuilderFactory{
103 140
 				nameNormalized = TYPES.NAMEDEFAULT;
104 141
 			}
105 142
 			
143
+			if(nameNormalized.contains("EM CAUSA")  )
144
+			{
145
+				nameNormalized = nameNormalized.split("EM CAUSA")[0].trim();
146
+				nameNormalized = nameNormalized.substring(0, nameNormalized.length() -1).trim();
147
+				
148
+			}
149
+				
150
+			
151
+			if(nameNormalized.endsWith(" E"))
152
+			{
153
+				nameNormalized = nameNormalized.split(" E")[0].trim();
154
+			}
155
+			
156
+			
157
+			if(this.getLowerCasePCT(nameNormalized) > 70)
158
+			{
159
+				nameNormalized = TYPES.NAMEDEFAULT;
160
+			}
161
+			
162
+			
163
+			if(nameNormalized.equals(""))
164
+			{
165
+				lawyer.setName(lawyer.getName());
166
+			}
167
+			else
168
+			{
169
+				lawyer.setName(nameNormalized);
170
+			}
171
+			
172
+			
106 173
 			
107 174
 		}
108 175
 		catch(Exception e)
@@ -112,37 +179,6 @@ public class STJBuilder extends BuilderFactory{
112 179
 		}
113 180
 
114 181
 
115
-
116
-		
117
-		
118
-		
119
-		if(nameNormalized.contains("EM CAUSA")  )
120
-		{
121
-			nameNormalized = nameNormalized.split("EM CAUSA")[0].trim();
122
-			nameNormalized = nameNormalized.substring(0, nameNormalized.length() -1).trim();
123
-			
124
-		}
125
-			
126
-		if(nameNormalized.equals("E OUTRO(S)"))
127
-		{
128
-			nameNormalized = TYPES.NAMEDEFAULT;
129
-		}
130
-			
131
-		
132
-		if(nameNormalized.endsWith(" E"))
133
-		{
134
-			nameNormalized = nameNormalized.split(" E")[0].trim();
135
-		}
136
-		
137
-		if(nameNormalized.equals(""))
138
-		{
139
-			lawyer.setName(lawyer.getName());
140
-		}
141
-		else
142
-		{
143
-			lawyer.setName(nameNormalized);
144
-		}
145
-		
146 182
 		return lawyer;
147 183
 		
148 184
 		
@@ -179,8 +215,12 @@ public class STJBuilder extends BuilderFactory{
179 215
 				if(type.contains("ADVOGAD") || type.contains("PROCURADOR") )
180 216
 				{
181 217
 				
182
-					
183
-					entity = this.normalizeLawyerName(new Lawyer(canditate.split(":")[1].trim()) );
218
+					Lawyer lawyer = this.normalizeLawyerName(new Lawyer(canditate.split(":")[1].trim()));
219
+					entity.setName( lawyer.getName());
220
+					if(lawyer.getOABNumber()!= null && lawyer.getOABNumber().equals(""))
221
+					{
222
+						entity.setOABNumber(lawyer.getOABNumber());
223
+					}
184 224
 					entity.setType(TYPES.LAWYER);
185 225
 					if(type.contains("ADVOGADOS") )
186 226
 						nextLawyer = true;
@@ -196,7 +236,8 @@ public class STJBuilder extends BuilderFactory{
196 236
 				{
197 237
 					nextLawyer = false;
198 238
 					entity.setType(TYPES.LAWYER);
199
-					entity.setName(canditate);
239
+					Lawyer lawyer = this.normalizeLawyerName(new Lawyer(canditate));
240
+					entity.setName(lawyer.getName());
200 241
 
201 242
 				}
202 243
 				else{
@@ -228,19 +269,6 @@ public class STJBuilder extends BuilderFactory{
228 269
 
229 270
 		DOProcess proc = new DOProcess();
230 271
 
231
-//		try{
232
-//			File file = new File("/home/ailton/Monografia/log");
233
-//			FileWriter fileWriter = new FileWriter(file, true);
234
-//			PrintWriter out = new PrintWriter(fileWriter);
235
-//			out.println(process);
236
-//			out.println();
237
-//			out.close();
238
-//
239
-//		}catch(Exception e)
240
-//		{
241
-//			e.printStackTrace();
242
-//		}
243
-
244 272
 
245 273
 		proc.setOrign("DJ_STJ");
246 274
 
@@ -273,6 +301,10 @@ public class STJBuilder extends BuilderFactory{
273 301
 
274 302
 			String processType = firstLine.toLowerCase().split("nº")[0].toLowerCase();
275 303
 			firstLine = firstLine.substring(processType.length());
304
+			processType = StringUtils.stripAccents(processType);
305
+			processType = processType.replaceAll("[^A-Za-z0-9 ]", "");
306
+
307
+			
276 308
 			ProcessTypes procType = new ProcessTypes(processType);
277 309
 			proc.setProcessType(procType);
278 310
 

+ 16 - 10
src/graph/output/database/DatabaseOutput.java

@@ -1,15 +1,11 @@
1 1
 package graph.output.database;
2 2
 
3
-import java.io.IOException;
4 3
 import java.util.List;
5 4
 import java.util.Random;
6 5
 
7
-import dao.connection.LuceneConnector;
8 6
 import dao.entity.DOProcess;
9 7
 import dao.lucene.relations.DAOGeneralProcess;
10
-import dao.lucene.tables.DAOEntity;
11
-import dao.lucene.tables.DAOJudge;
12
-import dao.lucene.tables.DAOLawyer;
8
+
13 9
 import graph.output.OutPutFactory;
14 10
 
15 11
 public class DatabaseOutput extends OutPutFactory {
@@ -43,9 +39,9 @@ public class DatabaseOutput extends OutPutFactory {
43 39
 		
44 40
 		int counter  = 0;
45 41
 		this.runned = 0;
46
-		DAOJudge daoJudge = DAOJudge.getInstance();
47
-		DAOEntity daoEntity = DAOEntity.getInstance();
48
-		DAOLawyer daoLawyer = DAOLawyer.getInstance();
42
+//		DAOJudge daoJudge = DAOJudge.getInstance();
43
+//		DAOEntity daoEntity = DAOEntity.getInstance();
44
+//		DAOLawyer daoLawyer = DAOLawyer.getInstance();
49 45
 		
50 46
 		while(allProcess.size() > 0)
51 47
 		{
@@ -89,8 +85,18 @@ public class DatabaseOutput extends OutPutFactory {
89 85
 		
90 86
 		
91 87
 		System.out.println("Foram atualizados " +runned+". Skip: " + (processSize-runned));
92
-
93
-		System.gc();
88
+		
89
+		try{
90
+			
91
+			
92
+			
93
+			//LuceneConnector.getIndexDisk().commit();
94
+			System.gc();
95
+		}catch(Exception e)
96
+		{
97
+			e.printStackTrace();
98
+		}
99
+		
94 100
 		
95 101
 		
96 102
 		

+ 48 - 0
test

@@ -0,0 +1,48 @@
1
+18102013
2
+28032016
3
+11022015
4
+20042015
5
+08042015
6
+04112014
7
+11072014
8
+29092015
9
+01092014
10
+04102013
11
+22042016
12
+13012014
13
+05112013
14
+10102013
15
+18122014
16
+13102015
17
+27122013
18
+04042016
19
+14102015
20
+15092015
21
+05122013
22
+09052016
23
+17032016
24
+03122014
25
+29092015
26
+25042014
27
+24102013
28
+04112015
29
+01102013
30
+14102015
31
+26052015
32
+25032015
33
+10072014
34
+29072014
35
+21012016
36
+06042016
37
+29112013
38
+22012016
39
+25072014
40
+10032014
41
+23102015
42
+01122014
43
+07102013
44
+01062015
45
+17032014
46
+28012014
47
+23082016
48
+28112014