1 /***********************************************************************
2  * Copyright (c) 2013-2024 Commonwealth Computer Research, Inc.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Apache License, Version 2.0
5  * which accompanies this distribution and is available at
6  * http://www.opensource.org/licenses/apache2.0.php.
7  ***********************************************************************/
8 
9 package org.locationtech.geomesa.utils.io
10 
11 import com.typesafe.scalalogging.LazyLogging
12 import org.apache.commons.io.FilenameUtils
13 import org.locationtech.geomesa.utils.io.fs.FileSystemDelegate.FileHandle
14 import org.locationtech.geomesa.utils.io.fs.{FileSystemDelegate, LocalDelegate}
15 
16 import java.io._
17 import java.net.URL
18 import java.nio.file._
19 import java.nio.file.attribute.BasicFileAttributes
20 import java.util.regex.Pattern
21 import scala.util.Try
22 
23 object PathUtils extends FileSystemDelegate with LazyLogging {
24 
25   private val uriRegex = Pattern.compile("""\w+://.*""")
26 
27   private val localDelegate = new LocalDelegate()
28 
29   // delegate allows us to avoid a runtime dependency on hadoop
30   private val hadoopDelegate: FileSystemDelegate =
31     try {
32       Class.forName("org.locationtech.geomesa.utils.hadoop.HadoopDelegate")
33         .getDeclaredConstructor().newInstance().asInstanceOf[FileSystemDelegate]
34     } catch {
35       case _: Throwable => null
36     }
37 
38   override def interpretPath(path: String): Seq[FileHandle] = chooseDelegate(path).interpretPath(path)
39 
40   override def getHandle(path: String): FileHandle = chooseDelegate(path).getHandle(path)
41 
42   override def getUrl(path: String): URL = chooseDelegate(path).getUrl(path)
43 
44   /**
45     * Checks to see if the path uses a URL pattern and then if it is *not* file://
46     *
47     * @param path Input resource path
48     * @return     Whether or not the resource is remote.
49     */
50   def isRemote(path: String): Boolean =
51     uriRegex.matcher(path).matches() && !path.toLowerCase.startsWith("file://")
52 
53   /**
54     * Returns the file extension, minus any compression that may be present
55     *
56     * @param path file path
57     * @return
58     */
59   def getUncompressedExtension(path: String): String =
60     FilenameUtils.getExtension(CompressionUtils.getUncompressedFilename(path))
61 
62   /**
63     * Gets the base file name and the extension. Useful for adding unique ids to a common file name,
64     * while preserving the extension
65     *
66     * @param path path
67     * @param includeDot if true, the '.' will be preserved in the extension, otherwise it will be dropped
68     * @return (base name including path prefix, extension)
69     */
70   def getBaseNameAndExtension(path: String, includeDot: Boolean = true): (String, String) = {
71     def dotIndex(base: Int): Int = if (includeDot) { base } else { base + 1}
72     val split = FilenameUtils.indexOfExtension(path)
73     if (split == -1) { (path, "") } else {
74       val withoutExtension = path.substring(0, split)
75       // look for file names like 'foo.tar.gz'
76       val secondSplit = FilenameUtils.indexOfExtension(withoutExtension)
77       if (secondSplit != -1 && CompressionUtils.isCompressedFilename(path)) {
78         (path.substring(0, secondSplit), path.substring(dotIndex(secondSplit)))
79       } else {
80         (withoutExtension, path.substring(dotIndex(split)))
81       }
82     }
83   }
84 
85   /**
86     * Wrap the input stream in a decompressor, if the file is compressed
87     *
88     * @param is input stream
89     * @param filename filename (used to determine compression)
90     * @return
91     */
92   def handleCompression(is: InputStream, filename: String): InputStream = {
93     val buffered = new BufferedInputStream(is)
94     CompressionUtils.Utils.find(_.isCompressedFilename(filename)) match {
95       case None => buffered
96       case Some(utils) => utils.compress(buffered)
97     }
98   }
99 
100   /**
101     * Delete a path, including all children
102     *
103     * @param path path
104     */
105   def deleteRecursively(path: Path): Unit = Files.walkFileTree(path, new DeleteFileVisitor)
106 
107   private def chooseDelegate(path: String): FileSystemDelegate =
108     if (hadoopDelegate != null && uriRegex.matcher(path).matches()) { hadoopDelegate } else { localDelegate }
109 
110   /**
111     * File visitor to delete nested paths
112     */
113   class DeleteFileVisitor extends FileVisitor[Path] {
114 
115     override def visitFileFailed(file: Path, exc: IOException): FileVisitResult = FileVisitResult.CONTINUE
116 
117     override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
118       if (!attrs.isDirectory) {
119         Files.delete(file)
120       }
121       FileVisitResult.CONTINUE
122     }
123 
124     override def preVisitDirectory(dir: Path, attrs: BasicFileAttributes): FileVisitResult = FileVisitResult.CONTINUE
125 
126     override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = {
127       Files.delete(dir)
128       FileVisitResult.CONTINUE
129     }
130   }
131 }
Line Stmt Id Pos Tree Symbol Tests Code
25 10863 1005 - 1036 Apply java.util.regex.Pattern.compile java.util.regex.Pattern.compile("\\w+://.*")
27 10864 1068 - 1087 Apply org.locationtech.geomesa.utils.io.fs.LocalDelegate.<init> new org.locationtech.geomesa.utils.io.fs.LocalDelegate()
32 10865 1234 - 1288 Literal <nosymbol> "org.locationtech.geomesa.utils.hadoop.HadoopDelegate"
33 10866 1220 - 1370 TypeApply scala.Any.asInstanceOf java.lang.Class.forName("org.locationtech.geomesa.utils.hadoop.HadoopDelegate").getDeclaredConstructor().newInstance().asInstanceOf[org.locationtech.geomesa.utils.io.fs.FileSystemDelegate]
33 10867 1220 - 1370 Block scala.Any.asInstanceOf java.lang.Class.forName("org.locationtech.geomesa.utils.hadoop.HadoopDelegate").getDeclaredConstructor().newInstance().asInstanceOf[org.locationtech.geomesa.utils.io.fs.FileSystemDelegate]
35 10868 1412 - 1416 Literal <nosymbol> null
35 10869 1412 - 1416 Block <nosymbol> null
38 10870 1486 - 1526 Apply org.locationtech.geomesa.utils.io.fs.FileSystemDelegate.interpretPath PathUtils.this.chooseDelegate(path).interpretPath(path)
40 10871 1581 - 1617 Apply org.locationtech.geomesa.utils.io.fs.FileSystemDelegate.getHandle PathUtils.this.chooseDelegate(path).getHandle(path)
42 10872 1662 - 1695 Apply org.locationtech.geomesa.utils.io.fs.FileSystemDelegate.getUrl PathUtils.this.chooseDelegate(path).getUrl(path)
51 10873 2003 - 2012 Literal <nosymbol> "file://"
51 10874 1974 - 2013 Select scala.Boolean.unary_! path.toLowerCase().startsWith("file://").unary_!
51 10875 1938 - 2013 Apply scala.Boolean.&& PathUtils.this.uriRegex.matcher(path).matches().&&(path.toLowerCase().startsWith("file://").unary_!)
60 10876 2238 - 2284 Apply org.locationtech.geomesa.utils.io.CompressionUtils.getUncompressedFilename CompressionUtils.getUncompressedFilename(path)
60 10877 2211 - 2285 Apply org.apache.commons.io.FilenameUtils.getExtension org.apache.commons.io.FilenameUtils.getExtension(CompressionUtils.getUncompressedFilename(path))
71 10878 2779 - 2783 Ident org.locationtech.geomesa.utils.io.PathUtils.base base
71 10879 2793 - 2801 Apply scala.Int.+ base.+(1)
71 10880 2793 - 2801 Block scala.Int.+ base.+(1)
72 10881 2819 - 2855 Apply org.apache.commons.io.FilenameUtils.indexOfExtension org.apache.commons.io.FilenameUtils.indexOfExtension(path)
73 10882 2864 - 2875 Apply scala.Int.== split.==(-1)
73 10883 2879 - 2889 Apply scala.Tuple2.apply scala.Tuple2.apply[String, String](path, "")
73 10884 2879 - 2889 Block scala.Tuple2.apply scala.Tuple2.apply[String, String](path, "")
73 10899 2897 - 3319 Block <nosymbol> { val withoutExtension: String = path.substring(0, split); val secondSplit: Int = org.apache.commons.io.FilenameUtils.indexOfExtension(withoutExtension); if (secondSplit.!=(-1).&&(CompressionUtils.isCompressedFilename(path))) scala.Tuple2.apply[String, String](path.substring(0, secondSplit), path.substring(dotIndex(secondSplit))) else scala.Tuple2.apply[String, String](withoutExtension, path.substring(dotIndex(split))) }
74 10885 2928 - 2952 Apply java.lang.String.substring path.substring(0, split)
76 10886 3024 - 3072 Apply org.apache.commons.io.FilenameUtils.indexOfExtension org.apache.commons.io.FilenameUtils.indexOfExtension(withoutExtension)
77 10887 3098 - 3100 Literal <nosymbol> -1
77 10888 3104 - 3147 Apply org.locationtech.geomesa.utils.io.CompressionUtils.isCompressedFilename CompressionUtils.isCompressedFilename(path)
77 10889 3083 - 3147 Apply scala.Boolean.&& secondSplit.!=(-1).&&(CompressionUtils.isCompressedFilename(path))
78 10890 3160 - 3190 Apply java.lang.String.substring path.substring(0, secondSplit)
78 10891 3207 - 3228 Apply org.locationtech.geomesa.utils.io.PathUtils.dotIndex dotIndex(secondSplit)
78 10892 3192 - 3229 Apply java.lang.String.substring path.substring(dotIndex(secondSplit))
78 10893 3159 - 3230 Apply scala.Tuple2.apply scala.Tuple2.apply[String, String](path.substring(0, secondSplit), path.substring(dotIndex(secondSplit)))
78 10894 3159 - 3230 Block scala.Tuple2.apply scala.Tuple2.apply[String, String](path.substring(0, secondSplit), path.substring(dotIndex(secondSplit)))
80 10895 3288 - 3303 Apply org.locationtech.geomesa.utils.io.PathUtils.dotIndex dotIndex(split)
80 10896 3273 - 3304 Apply java.lang.String.substring path.substring(dotIndex(split))
80 10897 3254 - 3305 Apply scala.Tuple2.apply scala.Tuple2.apply[String, String](withoutExtension, path.substring(dotIndex(split)))
80 10898 3254 - 3305 Block scala.Tuple2.apply scala.Tuple2.apply[String, String](withoutExtension, path.substring(dotIndex(split)))
93 10900 3618 - 3645 Apply java.io.BufferedInputStream.<init> new java.io.BufferedInputStream(is)
94 10901 3678 - 3710 Apply org.locationtech.geomesa.utils.io.CompressionUtils.isCompressedFilename x$1.isCompressedFilename(filename)
94 10902 3650 - 3711 Apply scala.collection.IterableLike.find CompressionUtils.Utils.find(((x$1: org.locationtech.geomesa.utils.io.CompressionUtils) => x$1.isCompressedFilename(filename)))
95 10903 3739 - 3747 Ident org.locationtech.geomesa.utils.io.PathUtils.buffered buffered
96 10904 3774 - 3798 Apply org.locationtech.geomesa.utils.io.CompressionUtils.compress utils.compress(buffered)
96 10905 3774 - 3798 Block org.locationtech.geomesa.utils.io.CompressionUtils.compress utils.compress(buffered)
105 10906 3965 - 3986 Apply org.locationtech.geomesa.utils.io.PathUtils.DeleteFileVisitor.<init> new PathUtils.this.DeleteFileVisitor()
105 10907 3940 - 3987 Apply java.nio.file.Files.walkFileTree java.nio.file.Files.walkFileTree(path, new PathUtils.this.DeleteFileVisitor())
105 10908 3958 - 3958 Literal <nosymbol> ()
108 10909 4080 - 4084 Literal <nosymbol> null
108 10910 4088 - 4120 Apply java.util.regex.Matcher.matches PathUtils.this.uriRegex.matcher(path).matches()
108 10911 4062 - 4120 Apply scala.Boolean.&& PathUtils.this.hadoopDelegate.!=(null).&&(PathUtils.this.uriRegex.matcher(path).matches())
108 10912 4124 - 4138 Select org.locationtech.geomesa.utils.io.PathUtils.hadoopDelegate PathUtils.this.hadoopDelegate
108 10913 4124 - 4138 Block org.locationtech.geomesa.utils.io.PathUtils.hadoopDelegate PathUtils.this.hadoopDelegate
108 10914 4148 - 4161 Select org.locationtech.geomesa.utils.io.PathUtils.localDelegate PathUtils.this.localDelegate
108 10915 4148 - 4161 Block org.locationtech.geomesa.utils.io.PathUtils.localDelegate PathUtils.this.localDelegate
115 10916 4357 - 4381 Literal <nosymbol> CONTINUE
118 10917 4481 - 4499 Select scala.Boolean.unary_! attrs.isDirectory().unary_!
118 10920 4477 - 4477 Literal <nosymbol> ()
118 10921 4477 - 4477 Block <nosymbol> ()
119 10918 4511 - 4529 Apply java.nio.file.Files.delete java.nio.file.Files.delete(file)
119 10919 4511 - 4529 Block java.nio.file.Files.delete java.nio.file.Files.delete(file)
121 10922 4544 - 4568 Literal <nosymbol> CONTINUE
124 10923 4669 - 4693 Literal <nosymbol> CONTINUE
127 10924 4787 - 4804 Apply java.nio.file.Files.delete java.nio.file.Files.delete(dir)
128 10925 4811 - 4835 Literal <nosymbol> CONTINUE