Skip to content

Commit 88d9845

Browse files
authored
flatgraph (#1769)
flatgraph port WIP squashed commits from michael/flatgraph.backup*
1 parent 231816e commit 88d9845

File tree

348 files changed

+86768
-86000
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

348 files changed

+86768
-86000
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
target/
55
codepropertygraph/project/
66
/codepropertygraph/src/main/resources/cpg.json
7+
/codepropertygraph/src/test/resources/cpg.odb.fg
78
private-key.pem
89
travis_wait_*
910
**/*.pyc
@@ -14,7 +15,7 @@ project/.bloop
1415
.project
1516
*.class
1617
/.classpath
17-
/project/project/
18+
/project/project/target
1819
/project/target/
1920
/target
2021
/foo.c

build.sbt

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
name := "codepropertygraph"
22

33
// parsed by project/Versions.scala, updated by updateDependencies.sh
4-
val overflowdbVersion = "1.193"
5-
val overflowdbCodegenVersion = "2.112"
4+
val flatgraphVersion = "0.0.81"
65

76
inThisBuild(
87
List(

codepropertygraph/build.sbt

+3-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@ name := "codepropertygraph"
33
dependsOn(Projects.protoBindings, Projects.domainClasses)
44

55
libraryDependencies ++= Seq(
6-
"io.shiftleft" %% "overflowdb-traversal" % Versions.overflowdb,
7-
"io.shiftleft" %% "overflowdb-formats" % Versions.overflowdb,
6+
"io.joern" %% "flatgraph-formats" % Versions.flatgraph,
7+
"io.joern" %% "flatgraph-help" % Versions.flatgraph,
8+
"io.joern" %% "flatgraph-odb-convert" % Versions.flatgraph,
89
"com.github.scopt" %% "scopt" % "4.0.1",
910
"com.github.pathikrit" %% "better-files" % "3.9.2",
1011
"org.slf4j" % "slf4j-api" % "2.0.6",

codepropertygraph/src/main/scala/io/shiftleft/OverflowDbTestInstance.scala

-10
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
package io.shiftleft.codepropertygraph
22

3-
import overflowdb.Graph
4-
import overflowdb.traversal.help.DocSearchPackages
3+
import flatgraph.Graph
4+
import flatgraph.help.DocSearchPackages
5+
import io.shiftleft.codepropertygraph.generated
56

67
/** TODO this is now being generated as well - for now we'll just forward calls to `generated.Cpg` next step is to
78
* remove this class and move remove the `generated` part from the generated package
89
*/
910
object Cpg {
10-
implicit val docSearchPackages: DocSearchPackages =
11-
DocSearchPackages("io.shiftleft", "io.joern")
11+
val defaultDocSearchPackage: DocSearchPackages = generated.Cpg.defaultDocSearchPackage
1212

1313
/** Syntactic sugar for `new Cpg(graph)`. Usage: `Cpg(graph)` or simply `Cpg` if you have an `implicit Graph` in scope
1414
*/
15-
def apply(implicit graph: Graph) = new Cpg(graph)
15+
def apply(implicit graph: Graph) = generated.Cpg(graph)
1616

1717
/** Create an empty code property graph
1818
*/
19-
def emptyCpg: Cpg =
20-
new Cpg(emptyGraph)
19+
def emptyCpg: generated.Cpg =
20+
generated.Cpg(emptyGraph)
2121

2222
/** Instantiate cpg with storage. If the storage file already exists, it will load (a subset of) the data into memory.
2323
* Otherwise it will create an empty cpg. In either case, configuring storage means that OverflowDb will be stored to
@@ -26,13 +26,10 @@ object Cpg {
2626
* @param path
2727
* to the storage file, e.g. /home/user1/overflowdb.bin
2828
*/
29-
def withStorage(path: String): Cpg =
30-
new Cpg(generated.Cpg.withStorage(path).graph)
31-
32-
def withConfig(config: overflowdb.Config): Cpg =
33-
Cpg(generated.Cpg.withConfig(config).graph)
29+
def withStorage(path: String, deserializeOnClose: Boolean = true): generated.Cpg =
30+
generated.Cpg.withStorage(java.nio.file.Paths.get(path), deserializeOnClose)
3431

3532
def emptyGraph: Graph =
36-
generated.Cpg.emptyGraph
33+
generated.Cpg.empty.graph
3734

3835
}
Original file line numberDiff line numberDiff line change
@@ -1,91 +1,117 @@
11
package io.shiftleft.codepropertygraph.cpgloading
22

3-
import better.files.File
43
import io.shiftleft.codepropertygraph.generated.Cpg
5-
import io.shiftleft.codepropertygraph.generated.PropertyNames
64
import org.slf4j.{Logger, LoggerFactory}
75

8-
import scala.util.Try
6+
import java.io.FileNotFoundException
7+
import java.nio.charset.StandardCharsets
8+
import java.nio.file.{Files, Path, Paths}
9+
import scala.util.Using
910

1011
object CpgLoader {
12+
private val logger: Logger = LoggerFactory.getLogger(getClass)
1113

12-
private val logger: Logger = LoggerFactory.getLogger(classOf[CpgLoader])
14+
/** Load a Code Property Graph from the given file */
15+
def load(filename: String): Cpg =
16+
load(Paths.get(filename))
1317

14-
/** Load a Code Property Graph
15-
*
16-
* @param filename
17-
* name of file that stores the code property graph
18-
* @param config
19-
* loader configuration
18+
/** Load a Code Property Graph from the given file - persist in given second file. I.e. the given input file will not
19+
* be modified, all changes will be written to the given 'persistTo' file. *
2020
*/
21-
def load(filename: String, config: CpgLoaderConfig = CpgLoaderConfig()): Cpg =
22-
new CpgLoader().load(filename, config)
21+
def load(from: String, persistTo: String): Cpg =
22+
load(Paths.get(from), Paths.get(persistTo))
2323

24-
/** Load Code Property Graph from an overflow DB file
25-
*
26-
* @param config
27-
* loader config
24+
/** Load a Code Property Graph from the given file
2825
*
29-
* This methods loads the CPG from an existing overflow DB file, specified in config.overflowDbConfig. In particular,
30-
* this config specifies the filename. For example, to load the database at "foo.db", you can issue the following:
31-
*
32-
* val odbConfig = Config.withDefaults().withStorageLocation(config.spPath) val config =
33-
* CpgLoaderConfig().withOverflowConfig(odbConfig) CpgLoader.loadFromOverflowDb(config)
26+
* Notes:
27+
* - detects the format as either flatgraph, overflowdb or proto
28+
* - a flatgraph storage opened straight away
29+
* - OverflowDb and proto formats are first converted to flatgraph, which is written to the `persistTo` file
30+
* - OverflowDb and proto formats are first converted to flatgraph, and therefor we create a new flatgraph storage
31+
* path, which can be obtained via `cpg.graph.storagePathMaybe`
3432
*/
35-
def loadFromOverflowDb(config: CpgLoaderConfig = CpgLoaderConfig()): Cpg = {
36-
new CpgLoader().loadFromOverflowDb(config)
33+
def load(path: Path): Cpg = {
34+
val absolutePath = path.toAbsolutePath
35+
if (!Files.exists(absolutePath)) {
36+
throw new FileNotFoundException(s"given input file $absolutePath does not exist")
37+
} else if (isProtoFormat(absolutePath)) {
38+
load(path, persistTo = absolutePath.resolveSibling(s"${path.getFileName}.fg"))
39+
} else if (isOverflowDbFormat(absolutePath)) {
40+
load(absolutePath, persistTo = path.resolveSibling(s"${path.getFileName}.fg"))
41+
} else {
42+
// assuming it's flatgraph format
43+
Cpg.withStorage(absolutePath)
44+
}
3745
}
3846

39-
/** Create any indexes necessary for quick access.
40-
*
41-
* @param cpg
42-
* the CPG to create indexes in
43-
*/
44-
def createIndexes(cpg: Cpg): Unit =
45-
new CpgLoader().createIndexes(cpg)
46-
47-
/** Determine whether the CPG is a legacy (proto) CPG
47+
/** Load a Code Property Graph from the given file, but persist it in the given second file. I.e. the given input file
48+
* will not be modified, all changes will be written to the given 'persistTo' file.
4849
*
49-
* @param filename
50-
* name of the file to probe
50+
* Notes:
51+
* - if the given 'persistTo' file already exists, it will be overridden
52+
* - detects the format as either flatgraph, overflowdb or proto
53+
* - a flatgraph storage is copied to the `persistTo` file and then opened straight away
54+
* - OverflowDb and proto formats are first converted to flatgraph, which is written to the `persistTo` file
5155
*/
52-
def isLegacyCpg(filename: String): Boolean =
53-
isLegacyCpg(File(filename))
54-
55-
/** Determine whether the CPG is a legacy (proto) CPG
56-
*
57-
* @param file
58-
* file to probe
59-
*/
60-
def isLegacyCpg(file: File): Boolean = {
61-
val bytes = file.bytes
62-
Try {
63-
bytes.next() == 'P' && bytes.next() == 'K'
64-
}.getOrElse(false)
56+
def load(from: Path, persistTo: Path): Cpg = {
57+
val absolutePath = from.toAbsolutePath
58+
if (persistTo != from)
59+
Files.deleteIfExists(persistTo)
60+
61+
if (!Files.exists(absolutePath)) {
62+
throw new FileNotFoundException(s"given input file $absolutePath does not exist")
63+
} else if (isProtoFormat(absolutePath)) {
64+
logger.debug(s"Converting $from from proto cpg into new flatgraph storage: $persistTo")
65+
ProtoCpgLoader.loadFromProtoZip(absolutePath.toString, Option(persistTo))
66+
} else if (isOverflowDbFormat(absolutePath)) {
67+
loadFromOverflowDb(absolutePath, persistTo)
68+
} else if (isFlatgraphFormat(absolutePath)) {
69+
Files.copy(absolutePath, persistTo)
70+
Cpg.withStorage(persistTo)
71+
} else {
72+
throw new AssertionError(
73+
s"unknown file format - we probed the first bytes but it didn't look like one of our known formats (proto.zip, flatgraph, overflowdb)"
74+
)
75+
}
6576
}
6677

67-
}
78+
/** Determine whether the CPG is a legacy (proto) CPG */
79+
def isProtoFormat(path: Path): Boolean =
80+
probeFirstBytes(path, "PK")
6881

69-
private class CpgLoader {
82+
/** Determine whether the CPG is a proto CPG */
83+
def isProtoFormat(filename: String): Boolean =
84+
isProtoFormat(Paths.get(filename))
7085

71-
import CpgLoader.logger
86+
def isOverflowDbFormat(path: Path): Boolean =
87+
probeFirstBytes(path, "H:2")
7288

73-
def load(filename: String, config: CpgLoaderConfig = CpgLoaderConfig.withoutOverflow): Cpg = {
74-
logger.debug("Loading " + filename)
89+
def isFlatgraphFormat(path: Path): Boolean =
90+
probeFirstBytes(path, "FLT GRPH") // flatgraph.storage.MagicBytesString
7591

76-
val cpg =
77-
ProtoCpgLoader.loadFromProtoZip(filename, config.overflowDbConfig)
78-
if (config.createIndexes) { createIndexes(cpg) }
79-
cpg
92+
/** Load Code Property Graph from an overflow DB file, by first converting it into a flatgraph binary */
93+
def loadFromOverflowDb(path: Path, persistTo: Path): Cpg = {
94+
logger.info(s"Converting $path from overflowdb to new flatgraph storage: $persistTo")
95+
flatgraph.convert.Convert.convertOdbToFlatgraph(overflowDbFile = path, outputFile = persistTo)
96+
Cpg.withStorage(persistTo)
8097
}
8198

82-
def loadFromOverflowDb(config: CpgLoaderConfig = CpgLoaderConfig()): Cpg = {
83-
val cpg = Cpg.withConfig(config.overflowDbConfig)
84-
if (config.createIndexes) { createIndexes(cpg) }
85-
cpg
99+
/** Determine whether the CPG is a legacy (proto) CPG */
100+
@deprecated("use `isProtoCpg` instead")
101+
def isLegacyCpg(filename: String): Boolean =
102+
isProtoFormat(Paths.get(filename))
103+
104+
/** Determine whether the CPG is a legacy (proto) CPG */
105+
@deprecated("use `isProtoCpg` instead")
106+
def isLegacyCpg(path: Path): Boolean =
107+
isProtoFormat(path)
108+
109+
private def probeFirstBytes(path: Path, probeFor: String): Boolean = {
110+
Using(Files.newInputStream(path)) { is =>
111+
val buffer = new Array[Byte](probeFor.size)
112+
is.read(buffer)
113+
new String(buffer, StandardCharsets.UTF_8) == probeFor
114+
}.getOrElse(false)
86115
}
87116

88-
def createIndexes(cpg: Cpg): Unit =
89-
cpg.graph.indexManager.createNodePropertyIndex(PropertyNames.FULL_NAME)
90-
91117
}

codepropertygraph/src/main/scala/io/shiftleft/codepropertygraph/cpgloading/CpgLoaderConfig.scala

-50
This file was deleted.

codepropertygraph/src/main/scala/io/shiftleft/codepropertygraph/cpgloading/NodeFilter.scala

-25
This file was deleted.

0 commit comments

Comments
 (0)