Browse Source

last changes

ailton 7 years ago
parent
commit
7fd1f3ad02

+ 6 - 0
pom.xml

24
   	<artifactId>gs-core</artifactId>
24
   	<artifactId>gs-core</artifactId>
25
   	<version>1.2</version>
25
   	<version>1.2</version>
26
   </dependency>
26
   </dependency>
27
+  <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
28
+ <dependency>
29
+    <groupId>org.apache.commons</groupId>
30
+    <artifactId>commons-lang3</artifactId>
31
+    <version>3.4</version>
32
+  </dependency>
27
   <dependency>
33
   <dependency>
28
   	<groupId>org.jgrapht</groupId>
34
   	<groupId>org.jgrapht</groupId>
29
   	<artifactId>jgrapht-core</artifactId>
35
   	<artifactId>jgrapht-core</artifactId>

+ 82 - 42
src/graph/extractor/graph/builder/BuilderFactory.java

1
 package graph.extractor.graph.builder;
1
 package graph.extractor.graph.builder;
2
 
2
 
3
 import java.io.File;
3
 import java.io.File;
4
-import java.io.IOException;
5
-import java.nio.file.Files;
6
 import java.util.ArrayList;
4
 import java.util.ArrayList;
7
 import java.util.List;
5
 import java.util.List;
8
-import java.util.Random;
9
-
10
-import org.apache.lucene.index.IndexWriter;
11
-
12
-import dao.connection.LuceneConnector;
6
+import java.util.Random;	
13
 import dao.entity.DOProcess;
7
 import dao.entity.DOProcess;
14
-import dao.lucene.tables.DAOJudge;
8
+import dao.lucene.cache.Cache;
15
 import graph.input.file.InputFactory;
9
 import graph.input.file.InputFactory;
16
 import graph.output.OutPutFactory;
10
 import graph.output.OutPutFactory;
17
 
11
 
19
 
13
 
20
 	protected List<DOProcess> processes;
14
 	protected List<DOProcess> processes;
21
 	protected InputFactory input;
15
 	protected InputFactory input;
22
-
16
+	public static long processIndex = 1;
17
+	
18
+	
23
 	public BuilderFactory(){
19
 	public BuilderFactory(){
24
 		this.processes = new ArrayList<DOProcess>();
20
 		this.processes = new ArrayList<DOProcess>();
25
 
21
 
37
 		long originalSize = allDateFiles.size();
33
 		long originalSize = allDateFiles.size();
38
 		
34
 		
39
 		
35
 		
36
+
40
 		while(allDateFiles.size() > 0)
37
 		while(allDateFiles.size() > 0)
41
 		{
38
 		{
42
 			try{
39
 			try{
43
 				Random rand = new Random();
40
 				Random rand = new Random();
44
-				
41
+
45
 				int randNumber = Math.abs(rand.nextInt(allDateFiles.size()));
42
 				int randNumber = Math.abs(rand.nextInt(allDateFiles.size()));
46
 				File oneDayOfDO = allDateFiles.get(randNumber);
43
 				File oneDayOfDO = allDateFiles.get(randNumber);
47
-				
44
+
48
 				List<String> allProcess  = input.getAllProcess(oneDayOfDO);
45
 				List<String> allProcess  = input.getAllProcess(oneDayOfDO);
49
-				
46
+
50
 				System.out.println("Processando dia "+oneDayOfDO.getName() + "---> " + allProcess.size() +" processos");
47
 				System.out.println("Processando dia "+oneDayOfDO.getName() + "---> " + allProcess.size() +" processos");
51
 				System.out.println("Processados " +(originalSize - allDateFiles.size())+" de " + originalSize+" diarios");
48
 				System.out.println("Processados " +(originalSize - allDateFiles.size())+" de " + originalSize+" diarios");
49
+
50
+
51
+
52
 				for(String oneProcess: allProcess)
52
 				for(String oneProcess: allProcess)
53
 				{
53
 				{
54
 					DOProcess process = this.extractProcess(oneProcess);
54
 					DOProcess process = this.extractProcess(oneProcess);
55
 					if(process != null)
55
 					if(process != null)
56
 					{
56
 					{
57
 						process.setProcessDate(oneDayOfDO.getName());
57
 						process.setProcessDate(oneDayOfDO.getName());
58
+						process.setName(process.getProcessNumber() + oneDayOfDO.getName());
58
 						this.processes.add(process);
59
 						this.processes.add(process);
59
 					}
60
 					}
60
-					
61
-					
61
+
62
 				}
62
 				}
63
+
64
+
63
 				output.buildOutput(this.processes);
65
 				output.buildOutput(this.processes);
64
-				
66
+
67
+
68
+
65
 				//this.moveDoneFiles(oneDayOfDO);
69
 				//this.moveDoneFiles(oneDayOfDO);
66
 				this.processes.clear();
70
 				this.processes.clear();
67
 				allProcess.clear();
71
 				allProcess.clear();
68
 				allDateFiles.remove(oneDayOfDO);
72
 				allDateFiles.remove(oneDayOfDO);
69
 				
73
 				
74
+				this.print();
75
+				this.store();
76
+				this.clear();
77
+				this.restart();
78
+				System.gc();
79
+
70
 			}catch(Exception e)
80
 			}catch(Exception e)
71
 			{
81
 			{
72
 				e.printStackTrace();
82
 				e.printStackTrace();
73
 			}
83
 			}
74
-			
84
+
75
 			System.out.println( "Completado " +(originalSize - allDateFiles.size())+" de " + originalSize +". Faltam "+ allDateFiles.size() +" Processos");
85
 			System.out.println( "Completado " +(originalSize - allDateFiles.size())+" de " + originalSize +". Faltam "+ allDateFiles.size() +" Processos");
86
+	
87
+
88
+
89
+
76
 			System.out.println("\n\n\n\n");
90
 			System.out.println("\n\n\n\n");
77
-			
78
 
91
 
79
 		}
92
 		}
80
-		DAOJudge daoJudge = DAOJudge.getInstance();
81
-		daoJudge.store();
82
-		try {
83
-			LuceneConnector.getIndexDisk().commit();
84
-		} catch (IOException e) {
85
-			// TODO Auto-generated catch block
86
-			e.printStackTrace();
87
-		}
88
-		
89
-		
90
 
93
 
94
+		//		afterInsertAll();
91
 
95
 
92
-	}
96
+	
93
 
97
 
94
-	private void moveDoneFiles(File oneDayOfDO) {
95
-		
96
-		try{
97
-			
98
+
99
+
100
+
101
+
102
+
103
+
104
+	}
105
+	
106
+	private void restart() {
107
+		Cache.getCache();
98
 		
108
 		
99
-		List<File> files = input.getFilesFromDate(oneDayOfDO);
109
+	}
110
+
111
+	private void clear() {
112
+		// TODO Auto-generated method stub
113
+		Cache.delete();
100
 		
114
 		
101
 		
115
 		
102
 		
116
 		
103
-		for(File file: files)
104
-		{
105
-			File doneFolder = new File("/home/ailton/Monografia/done/"+file.getName());
106
-			Files.move(file.toPath(), doneFolder.toPath());
107
-			
108
-		}}catch(Exception e)
109
-		{
110
-			e.printStackTrace();
111
-		}
117
+	}
118
+
119
+	public void print()
120
+	{
121
+		Cache.getCache().printCache();
122
+	}
123
+	
124
+	
125
+	public void store()
126
+	{
127
+		Cache cache = Cache.getCache();
128
+		cache.store();
112
 		
129
 		
113
 		
130
 		
114
 	}
131
 	}
132
+	
133
+
134
+	//	private void moveDoneFiles(File oneDayOfDO) {
135
+	//		
136
+	//		try{
137
+	//			
138
+	//		
139
+	//		List<File> files = input.getFilesFromDate(oneDayOfDO);
140
+	//		
141
+	//		
142
+	//		
143
+	//		for(File file: files)
144
+	//		{
145
+	//			File doneFolder = new File("/home/ailton/Monografia/done/"+file.getName());
146
+	//			Files.move(file.toPath(), doneFolder.toPath());
147
+	//			
148
+	//		}}catch(Exception e)
149
+	//		{
150
+	//			e.printStackTrace();
151
+	//		}
152
+	//		
153
+	//		
154
+	//	}
115
 
155
 
116
 	public List<DOProcess> getAllProcess()
156
 	public List<DOProcess> getAllProcess()
117
 	{
157
 	{

+ 82 - 50
src/graph/extractor/graph/builder/STJBuilder.java

2
 
2
 
3
 
3
 
4
 
4
 
5
+import org.apache.commons.lang3.StringUtils;
6
+
5
 import dao.entity.DOProcess;
7
 import dao.entity.DOProcess;
6
 import dao.entity.Entity;
8
 import dao.entity.Entity;
7
 import dao.entity.Judge;
9
 import dao.entity.Judge;
16
 	private String normalizeJudgeName(String judgeName)
18
 	private String normalizeJudgeName(String judgeName)
17
 	{
19
 	{
18
 		
20
 		
21
+	    judgeName = StringUtils.stripAccents(judgeName);
22
+
23
+		
19
 		if(judgeName.contains("(") && !judgeName.startsWith("(") )
24
 		if(judgeName.contains("(") && !judgeName.startsWith("(") )
20
 		{
25
 		{
21
 
26
 
44
 	
49
 	
45
 	private String normalizeEntityName(String entityName)
50
 	private String normalizeEntityName(String entityName)
46
 	{
51
 	{
47
-		String originalEntityName = entityName.trim();
52
+		
53
+		entityName = StringUtils.stripAccents(entityName);
48
 		
54
 		
49
 		if(entityName.contains("(PRESO)"))
55
 		if(entityName.contains("(PRESO)"))
50
 		{
56
 		{
62
 		{
68
 		{
63
 			entityName = entityName.split("\\(")[0].trim();
69
 			entityName = entityName.split("\\(")[0].trim();
64
 		}
70
 		}
71
+		if(entityName.contains("ASSIST"))
72
+		{
73
+			
74
+			entityName = entityName.split("ASSIST")[0].trim();
75
+			entityName = entityName.substring(0, entityName.length() -1);
76
+			
77
+			
78
+			
79
+		}
80
+		if(entityName.contains("CONVOCADA"))
81
+		{
82
+			
83
+			entityName = entityName.split("CONVOCADA")[0].trim();
84
+			entityName = entityName.substring(0, entityName.length() -1);
85
+			
86
+			
87
+			
88
+		}
89
+		
65
 		
90
 		
66
 		
91
 		
67
 		if(entityName.length() > 0)
92
 		if(entityName.length() > 0)
75
 	private Lawyer normalizeLawyerName(Lawyer lawyer)
100
 	private Lawyer normalizeLawyerName(Lawyer lawyer)
76
 	{
101
 	{
77
 		
102
 		
78
-		//String originalEntityName = entityName.trim();
103
+		
104
+		
79
 		String nameNormalized = lawyer.getName().replaceAll("\\r|\\n", " ").trim();
105
 		String nameNormalized = lawyer.getName().replaceAll("\\r|\\n", " ").trim();
80
-
106
+		nameNormalized = StringUtils.stripAccents(nameNormalized);
107
+		
108
+		nameNormalized = nameNormalized.replaceAll("\"", "").trim();
109
+		nameNormalized = nameNormalized.replaceAll("/", "").trim();
110
+		 
81
 
111
 
82
 		try{
112
 		try{
83
 			
113
 			
92
 				}
122
 				}
93
 				nameNormalized = nameNormalized.split("-")[0].trim();
123
 				nameNormalized = nameNormalized.split("-")[0].trim();
94
 			}
124
 			}
125
+			if(nameNormalized.contains("ASSIST"))
126
+			{
127
+				
128
+				nameNormalized = nameNormalized.split("ASSIST")[0].trim();
129
+				nameNormalized = nameNormalized.substring(0, nameNormalized.length() -1);
130
+				
131
+			}
95
 			
132
 			
96
 			if(nameNormalized.contains("E OUTRO"))
133
 			if(nameNormalized.contains("E OUTRO"))
97
 			{
134
 			{
103
 				nameNormalized = TYPES.NAMEDEFAULT;
140
 				nameNormalized = TYPES.NAMEDEFAULT;
104
 			}
141
 			}
105
 			
142
 			
143
+			if(nameNormalized.contains("EM CAUSA")  )
144
+			{
145
+				nameNormalized = nameNormalized.split("EM CAUSA")[0].trim();
146
+				nameNormalized = nameNormalized.substring(0, nameNormalized.length() -1).trim();
147
+				
148
+			}
149
+				
150
+			
151
+			if(nameNormalized.endsWith(" E"))
152
+			{
153
+				nameNormalized = nameNormalized.split(" E")[0].trim();
154
+			}
155
+			
156
+			
157
+			if(this.getLowerCasePCT(nameNormalized) > 70)
158
+			{
159
+				nameNormalized = TYPES.NAMEDEFAULT;
160
+			}
161
+			
162
+			
163
+			if(nameNormalized.equals(""))
164
+			{
165
+				lawyer.setName(lawyer.getName());
166
+			}
167
+			else
168
+			{
169
+				lawyer.setName(nameNormalized);
170
+			}
171
+			
172
+			
106
 			
173
 			
107
 		}
174
 		}
108
 		catch(Exception e)
175
 		catch(Exception e)
112
 		}
179
 		}
113
 
180
 
114
 
181
 
115
-
116
-		
117
-		
118
-		
119
-		if(nameNormalized.contains("EM CAUSA")  )
120
-		{
121
-			nameNormalized = nameNormalized.split("EM CAUSA")[0].trim();
122
-			nameNormalized = nameNormalized.substring(0, nameNormalized.length() -1).trim();
123
-			
124
-		}
125
-			
126
-		if(nameNormalized.equals("E OUTRO(S)"))
127
-		{
128
-			nameNormalized = TYPES.NAMEDEFAULT;
129
-		}
130
-			
131
-		
132
-		if(nameNormalized.endsWith(" E"))
133
-		{
134
-			nameNormalized = nameNormalized.split(" E")[0].trim();
135
-		}
136
-		
137
-		if(nameNormalized.equals(""))
138
-		{
139
-			lawyer.setName(lawyer.getName());
140
-		}
141
-		else
142
-		{
143
-			lawyer.setName(nameNormalized);
144
-		}
145
-		
146
 		return lawyer;
182
 		return lawyer;
147
 		
183
 		
148
 		
184
 		
179
 				if(type.contains("ADVOGAD") || type.contains("PROCURADOR") )
215
 				if(type.contains("ADVOGAD") || type.contains("PROCURADOR") )
180
 				{
216
 				{
181
 				
217
 				
182
-					
183
-					entity = this.normalizeLawyerName(new Lawyer(canditate.split(":")[1].trim()) );
218
+					Lawyer lawyer = this.normalizeLawyerName(new Lawyer(canditate.split(":")[1].trim()));
219
+					entity.setName( lawyer.getName());
220
+					if(lawyer.getOABNumber()!= null && lawyer.getOABNumber().equals(""))
221
+					{
222
+						entity.setOABNumber(lawyer.getOABNumber());
223
+					}
184
 					entity.setType(TYPES.LAWYER);
224
 					entity.setType(TYPES.LAWYER);
185
 					if(type.contains("ADVOGADOS") )
225
 					if(type.contains("ADVOGADOS") )
186
 						nextLawyer = true;
226
 						nextLawyer = true;
196
 				{
236
 				{
197
 					nextLawyer = false;
237
 					nextLawyer = false;
198
 					entity.setType(TYPES.LAWYER);
238
 					entity.setType(TYPES.LAWYER);
199
-					entity.setName(canditate);
239
+					Lawyer lawyer = this.normalizeLawyerName(new Lawyer(canditate));
240
+					entity.setName(lawyer.getName());
200
 
241
 
201
 				}
242
 				}
202
 				else{
243
 				else{
228
 
269
 
229
 		DOProcess proc = new DOProcess();
270
 		DOProcess proc = new DOProcess();
230
 
271
 
231
-//		try{
232
-//			File file = new File("/home/ailton/Monografia/log");
233
-//			FileWriter fileWriter = new FileWriter(file, true);
234
-//			PrintWriter out = new PrintWriter(fileWriter);
235
-//			out.println(process);
236
-//			out.println();
237
-//			out.close();
238
-//
239
-//		}catch(Exception e)
240
-//		{
241
-//			e.printStackTrace();
242
-//		}
243
-
244
 
272
 
245
 		proc.setOrign("DJ_STJ");
273
 		proc.setOrign("DJ_STJ");
246
 
274
 
273
 
301
 
274
 			String processType = firstLine.toLowerCase().split("nº")[0].toLowerCase();
302
 			String processType = firstLine.toLowerCase().split("nº")[0].toLowerCase();
275
 			firstLine = firstLine.substring(processType.length());
303
 			firstLine = firstLine.substring(processType.length());
304
+			processType = StringUtils.stripAccents(processType);
305
+			processType = processType.replaceAll("[^A-Za-z0-9 ]", "");
306
+
307
+			
276
 			ProcessTypes procType = new ProcessTypes(processType);
308
 			ProcessTypes procType = new ProcessTypes(processType);
277
 			proc.setProcessType(procType);
309
 			proc.setProcessType(procType);
278
 
310
 

+ 16 - 10
src/graph/output/database/DatabaseOutput.java

1
 package graph.output.database;
1
 package graph.output.database;
2
 
2
 
3
-import java.io.IOException;
4
 import java.util.List;
3
 import java.util.List;
5
 import java.util.Random;
4
 import java.util.Random;
6
 
5
 
7
-import dao.connection.LuceneConnector;
8
 import dao.entity.DOProcess;
6
 import dao.entity.DOProcess;
9
 import dao.lucene.relations.DAOGeneralProcess;
7
 import dao.lucene.relations.DAOGeneralProcess;
10
-import dao.lucene.tables.DAOEntity;
11
-import dao.lucene.tables.DAOJudge;
12
-import dao.lucene.tables.DAOLawyer;
8
+
13
 import graph.output.OutPutFactory;
9
 import graph.output.OutPutFactory;
14
 
10
 
15
 public class DatabaseOutput extends OutPutFactory {
11
 public class DatabaseOutput extends OutPutFactory {
43
 		
39
 		
44
 		int counter  = 0;
40
 		int counter  = 0;
45
 		this.runned = 0;
41
 		this.runned = 0;
46
-		DAOJudge daoJudge = DAOJudge.getInstance();
47
-		DAOEntity daoEntity = DAOEntity.getInstance();
48
-		DAOLawyer daoLawyer = DAOLawyer.getInstance();
42
+//		DAOJudge daoJudge = DAOJudge.getInstance();
43
+//		DAOEntity daoEntity = DAOEntity.getInstance();
44
+//		DAOLawyer daoLawyer = DAOLawyer.getInstance();
49
 		
45
 		
50
 		while(allProcess.size() > 0)
46
 		while(allProcess.size() > 0)
51
 		{
47
 		{
89
 		
85
 		
90
 		
86
 		
91
 		System.out.println("Foram atualizados " +runned+". Skip: " + (processSize-runned));
87
 		System.out.println("Foram atualizados " +runned+". Skip: " + (processSize-runned));
92
-
93
-		System.gc();
88
+		
89
+		try{
90
+			
91
+			
92
+			
93
+			//LuceneConnector.getIndexDisk().commit();
94
+			System.gc();
95
+		}catch(Exception e)
96
+		{
97
+			e.printStackTrace();
98
+		}
99
+		
94
 		
100
 		
95
 		
101
 		
96
 		
102
 		

+ 48 - 0
test

1
+18102013
2
+28032016
3
+11022015
4
+20042015
5
+08042015
6
+04112014
7
+11072014
8
+29092015
9
+01092014
10
+04102013
11
+22042016
12
+13012014
13
+05112013
14
+10102013
15
+18122014
16
+13102015
17
+27122013
18
+04042016
19
+14102015
20
+15092015
21
+05122013
22
+09052016
23
+17032016
24
+03122014
25
+29092015
26
+25042014
27
+24102013
28
+04112015
29
+01102013
30
+14102015
31
+26052015
32
+25032015
33
+10072014
34
+29072014
35
+21012016
36
+06042016
37
+29112013
38
+22012016
39
+25072014
40
+10032014
41
+23102015
42
+01122014
43
+07102013
44
+01062015
45
+17032014
46
+28012014
47
+23082016
48
+28112014