Logging
OpenLineage Java library is based on slf4j when generating logs. Being able to emit logs for various purposes is very helpful when troubleshooting OpenLineage.
Consider the following sample java code that emits OpenLineage events:
package ol.test;
import io.openlineage.client.OpenLineage;
import io.openlineage.client.OpenLineageClient;
import io.openlineage.client.OpenLineage.RunEvent;
import io.openlineage.client.OpenLineage.InputDataset;
import io.openlineage.client.OpenLineage.Job;
import io.openlineage.client.OpenLineage.JobFacets;
import io.openlineage.client.OpenLineage.OutputDataset;
import io.openlineage.client.OpenLineage.Run;
import io.openlineage.client.OpenLineage.RunFacets;
import io.openlineage.client.OpenLineage.RunEvent.EventType;
import io.openlineage.client.transports.HttpTransport;
import java.net.URI;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.Arrays;
import java.util.List;
import java.util.UUID;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* sample openlinage client code
*/
public class OpenLineageClientTest
{
private static Logger logger = LoggerFactory.getLogger(OpenLineageClientTest.class);
public static void main( String[] args )
{
logger.info("Running OpenLineage Client Test...");
try {
OpenLineageClient client = OpenLineageClient.builder()
.transport(
HttpTransport.builder()
.uri("http://localhost:5000")
.apiKey("abcdefghijklmn")
.build())
.build();
// create one start event for testing
RunEvent event = buildEvent(EventType.START, null);
// emit the event
client.emit(event);
// another event to COMPLETE the run
event = buildEvent(EventType.COMPLETE, event.getRun().getRunId());
// emit the second COMPLETE event
client.emit(event);
} catch (Exception e) {
e.printStackTrace();
}
}
// sample code to build event
public static RunEvent buildEvent(EventType eventType, UUID runId) {
ZonedDateTime now = ZonedDateTime.now(ZoneId.of("UTC"));
URI producer = URI.create("producer");
OpenLineage ol = new OpenLineage(producer);
if (runId == null) {
runId = UUID.randomUUID();
}
// run facets
RunFacets runFacets =
ol.newRunFacetsBuilder()
.nominalTime(
ol.newNominalTimeRunFacetBuilder()
.nominalStartTime(now)
.nominalEndTime(now)
.build())
.build();
// a run is composed of run id, and run facets
Run run = ol.newRunBuilder().runId(runId).facets(runFacets).build();
// job facets
JobFacets jobFacets = ol.newJobFacetsBuilder().build();
// job
String name = "jobName";
String namespace = "namespace";
Job job = ol.newJobBuilder().namespace(namespace).name(name).facets(jobFacets).build();
// input dataset
List<InputDataset> inputs =
Arrays.asList(
ol.newInputDatasetBuilder()
.namespace("ins")
.name("input")
.facets(
ol.newDatasetFacetsBuilder()
.version(ol.newDatasetVersionDatasetFacet("input-version"))
.build())
.inputFacets(
ol.newInputDatasetInputFacetsBuilder()
.dataQualityMetrics(
ol.newDataQualityMetricsInputDatasetFacetBuilder()
.rowCount(10L)
.bytes(20L)
.columnMetrics(
ol.newDataQualityMetricsInputDatasetFacetColumnMetricsBuilder()
.put(
"mycol",
ol.newDataQualityMetricsInputDatasetFacetColumnMetricsAdditionalBuilder()
.count(10D)
.distinctCount(10L)
.max(30D)
.min(5D)
.nullCount(1L)
.sum(3000D)
.quantiles(
ol.newDataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantilesBuilder()
.put("25", 52D)
.build())
.build())
.build())
.build())
.build())
.build());
// output dataset
List<OutputDataset> outputs =
Arrays.asList(
ol.newOutputDatasetBuilder()
.namespace("ons")
.name("output")
.facets(
ol.newDatasetFacetsBuilder()
.version(ol.newDatasetVersionDatasetFacet("output-version"))
.build())
.outputFacets(
ol.newOutputDatasetOutputFacetsBuilder()
.outputStatistics(ol.newOutputStatisticsOutputDatasetFacet(10L, 20L))
.build())
.build());
// run state udpate which encapsulates all - with START event in this case
RunEvent runStateUpdate =
ol.newRunEventBuilder()
.eventType(eventType)
.eventTime(now)
.run(run)
.job(job)
.inputs(inputs)
.outputs(outputs)
.build();
return runStateUpdate;
}
}
When you use OpenLineage backend such as Marquez on your local environment, the program would emit OpenLienage events to it.
java ol.test.OpenLineageClientTest
However, this short program does not produce any logging information, as the logging configuration is required to be setup. Below are the examples of adding dependencies of the libraries that you need to use log4j2
as the target implementation for the slf4j, on maven or gradle.
Maven
pom.xml
<dependencies>
...
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.7</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.7</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.7</version>
</dependency>
...
</dependencies>
Gradle
build.gradle
dependencies {
...
implementation "org.apache.logging.log4j:log4j-api:2.7"
implementation "org.apache.logging.log4j:log4j-core:2.7"
implementation "org.apache.logging.log4j:log4j-slf4j-impl:2.7"
...
}
You also need to create a log4j configuration file, log4j2.properties
on the classpath. Here is the sample log configuration.
# Set to debug or trace if log4j initialization is failing
status = warn
# Name of the configuration
name = ConsoleLogConfigDemo
# Console appender configuration
appender.console.type = Console
appender.console.name = consoleLogger
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
# Root logger level
rootLogger.level = debug
# Root logger referring to console appender
rootLogger.appenderRef.stdout.ref = consoleLogger
Re-compiling and running the ol.test.OpenLineageClientTest
again will produce the following outputs:
2022-12-07 08:57:24 INFO OpenLineageClientTest:33 - Running OpenLineage Client Test...
2022-12-07 08:57:25 DEBUG HttpTransport:96 - POST http://localhost:5000/api/v1/lineage: {"eventType":"START","eventTime":"2022-12-07T14:57:25.072781Z","run":{"runId":"0142c998-3416-49e7-92aa-d025c4c93697","facets":{"nominalTime":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/NominalTimeRunFacet.json#/$defs/NominalTimeRunFacet","nominalStartTime":"2022-12-07T14:57:25.072781Z","nominalEndTime":"2022-12-07T14:57:25.072781Z"}}},"job":{"namespace":"namespace","name":"jobName","facets":{}},"inputs":[{"namespace":"ins","name":"input","facets":{"version":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasetVersionDatasetFacet.json#/$defs/DatasetVersionDatasetFacet","datasetVersion":"input-version"}},"inputFacets":{"dataQualityMetrics":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DataQualityMetricsInputDatasetFacet.json#/$defs/DataQualityMetricsInputDatasetFacet","rowCount":10,"bytes":20,"columnMetrics":{"mycol":{"nullCount":1,"distinctCount":10,"sum":3000.0,"count":10.0,"min":5.0,"max":30.0,"quantiles":{"25":52.0}}}}}}],"outputs":[{"namespace":"ons","name":"output","facets":{"version":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasetVersionDatasetFacet.json#/$defs/DatasetVersionDatasetFacet","datasetVersion":"output-version"}},"outputFacets":{"outputStatistics":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/OutputStatisticsOutputDatasetFacet.json#/$defs/OutputStatisticsOutputDatasetFacet","rowCount":10,"size":20}}}],"producer":"producer","schemaURL":"https://openlineage.io/spec/1-0-4/OpenLineage.json#/$defs/RunEvent"}
2022-12-07 08:57:25 DEBUG HttpTransport:96 - POST http://localhost:5000/api/v1/lineage: {"eventType":"COMPLETE","eventTime":"2022-12-07T14:57:25.42041Z","run":{"runId":"0142c998-3416-49e7-92aa-d025c4c93697","facets":{"nominalTime":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/NominalTimeRunFacet.json#/$defs/NominalTimeRunFacet","nominalStartTime":"2022-12-07T14:57:25.42041Z","nominalEndTime":"2022-12-07T14:57:25.42041Z"}}},"job":{"namespace":"namespace","name":"jobName","facets":{}},"inputs":[{"namespace":"ins","name":"input","facets":{"version":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasetVersionDatasetFacet.json#/$defs/DatasetVersionDatasetFacet","datasetVersion":"input-version"}},"inputFacets":{"dataQualityMetrics":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DataQualityMetricsInputDatasetFacet.json#/$defs/DataQualityMetricsInputDatasetFacet","rowCount":10,"bytes":20,"columnMetrics":{"mycol":{"nullCount":1,"distinctCount":10,"sum":3000.0,"count":10.0,"min":5.0,"max":30.0,"quantiles":{"25":52.0}}}}}}],"outputs":[{"namespace":"ons","name":"output","facets":{"version":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasetVersionDatasetFacet.json#/$defs/DatasetVersionDatasetFacet","datasetVersion":"output-version"}},"outputFacets":{"outputStatistics":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/OutputStatisticsOutputDatasetFacet.json#/$defs/OutputStatisticsOutputDatasetFacet","rowCount":10,"size":20}}}],"producer":"producer","schemaURL":"https://openlineage.io/spec/1-0-4/OpenLineage.json#/$defs/RunEvent"}
Logs will also produce meaningful error messages when something does not work correctly. For example, if the backend server does not exist, you would get the following messages in your console output:
2022-12-07 09:15:16 INFO OpenLineageClientTest:33 - Running OpenLineage Client Test...
2022-12-07 09:15:16 DEBUG HttpTransport:96 - POST http://localhost:5000/api/v1/lineage: {"eventType":"START","eventTime":"2022-12-07T15:15:16.668979Z","run":{"runId":"69861937-55ba-43f5-ab5e-fe78ef6a283d","facets":{"nominalTime":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/NominalTimeRunFacet.json#/$defs/NominalTimeRunFacet","nominalStartTime":"2022-12-07T15:15:16.668979Z","nominalEndTime":"2022-12-07T15:15:16.668979Z"}}},"job":{"namespace":"namespace","name":"jobName","facets":{}},"inputs":[{"namespace":"ins","name":"input","facets":{"version":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasetVersionDatasetFacet.json#/$defs/DatasetVersionDatasetFacet","datasetVersion":"input-version"}},"inputFacets":{"dataQualityMetrics":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DataQualityMetricsInputDatasetFacet.json#/$defs/DataQualityMetricsInputDatasetFacet","rowCount":10,"bytes":20,"columnMetrics":{"mycol":{"nullCount":1,"distinctCount":10,"sum":3000.0,"count":10.0,"min":5.0,"max":30.0,"quantiles":{"25":52.0}}}}}}],"outputs":[{"namespace":"ons","name":"output","facets":{"version":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasetVersionDatasetFacet.json#/$defs/DatasetVersionDatasetFacet","datasetVersion":"output-version"}},"outputFacets":{"outputStatistics":{"_producer":"producer","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/OutputStatisticsOutputDatasetFacet.json#/$defs/OutputStatisticsOutputDatasetFacet","rowCount":10,"size":20}}}],"producer":"producer","schemaURL":"https://openlineage.io/spec/1-0-4/OpenLineage.json#/$defs/RunEvent"}
io.openlineage.client.OpenLineageClientException: org.apache.http.conn.HttpHostConnectException: Connect to localhost:5000 [localhost/127.0.0.1, localhost/0:0:0:0:0:0:0:1] failed: Connection refused
at io.openlineage.client.transports.HttpTransport.emit(HttpTransport.java:113)
at io.openlineage.client.OpenLineageClient.emit(OpenLineageClient.java:42)
at ol.test.OpenLineageClientTest.main(OpenLineageClientTest.java:48)
Caused by: org.apache.http.conn.HttpHostConnectException: Connect to localhost:5000 [localhost/127.0.0.1, localhost/0:0:0:0:0:0:0:1] failed: Connection refused
at org.apache.http.impl.conn.DefaultHttpClientConnectionOperator.connect(DefaultHttpClientConnectionOperator.java:156)
at org.apache.http.impl.conn.PoolingHttpClientConnectionManager.connect(PoolingHttpClientConnectionManager.java:376)
at org.apache.http.impl.execchain.MainClientExec.establishRoute(MainClientExec.java:393)
at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:236)
at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:186)
at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:89)
at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110)
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:108)
at io.openlineage.client.transports.HttpTransport.emit(HttpTransport.java:108)
... 2 more
Caused by: java.net.ConnectException: Connection refused
at java.base/sun.nio.ch.Net.pollConnect(Native Method)
at java.base/sun.nio.ch.Net.pollConnectNow(Net.java:672)
at java.base/sun.nio.ch.NioSocketImpl.timedFinishConnect(NioSocketImpl.java:542)
at java.base/sun.nio.ch.NioSocketImpl.connect(NioSocketImpl.java:585)
at java.base/java.net.SocksSocketImpl.connect(SocksSocketImpl.java:327)
at java.base/java.net.Socket.connect(Socket.java:666)
at org.apache.http.conn.socket.PlainConnectionSocketFactory.connectSocket(PlainConnectionSocketFactory.java:75)
at org.apache.http.impl.conn.DefaultHttpClientConnectionOperator.connect(DefaultHttpClientConnectionOperator.java:142)
... 12 more
If you wish to output loggigng message to a file, you can modify the basic configuration by adding a file appender configuration as follows:
# Set to debug or trace if log4j initialization is failing
status = warn
# Name of the configuration
name = ConsoleLogConfigDemo
# Console appender configuration
appender.console.type = Console
appender.console.name = consoleLogger
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
# File appender configuration
appender.file.type = File
appender.file.name = fileLogger
appender.file.fileName = app.log
appender.file.layout.type = PatternLayout
appender.file.layout.pattern = %d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
# Root logger level
rootLogger.level = debug
# Root logger referring to console appender
rootLogger.appenderRef.stdout.ref = consoleLogger
rootLogger.appenderRef.file.ref = fileLogger
And the logs will be saved to a file app.log
.
Outputting logs using log4j2
is just one way of doing it, so below are some additional resources of undersatnding how Java logging works, and other ways to output the logs.