Skip to content

Commit 0a8510c

Browse files
committed
perf: improve the rename performance.
Signed-off-by: Yang Yu <[email protected]>
1 parent 7dfdb92 commit 0a8510c

File tree

7 files changed

+199
-87
lines changed

7 files changed

+199
-87
lines changed

pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>com.qcloud.cos</groupId>
88
<artifactId>hadoop-cos</artifactId>
9-
<version>8.3.19</version>
9+
<version>8.3.20</version>
1010
<packaging>jar</packaging>
1111

1212
<name>Apache Hadoop Tencent Cloud COS Support</name>

src/main/java/org/apache/hadoop/fs/CosNFileStatus.java

+9-2
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
import org.apache.hadoop.fs.permission.FsPermission;
44

55
import javax.annotation.Nullable;
6+
import java.util.Map;
67

78
public class CosNFileStatus extends FileStatus {
89
private final String ETag;
910
private final String crc64ecma;
1011
private final String crc32cm;
1112
private final String storageClass;
1213
private final String versionId;
14+
private final Map<String, byte[]> userAttributes;
1315

1416
public CosNFileStatus(long length, boolean isdir, int block_replication, long blocksize, long modification_time,
1517
long access_time, FsPermission permission, String owner, String group, Path path) {
@@ -21,19 +23,20 @@ public CosNFileStatus(long length, boolean isdir, int block_replication, long bl
2123
long access_time, FsPermission permission, String owner, String group, Path path,
2224
String ETag) {
2325
this(length, isdir, block_replication, blocksize, modification_time, access_time, permission, owner, group,
24-
path, ETag, null, null, null, null);
26+
path, ETag, null, null, null, null, null);
2527
}
2628

2729
public CosNFileStatus(long length, boolean isdir, int block_replication, long blocksize, long modification_time,
2830
long access_time, FsPermission permission, String owner, String group, Path path,
29-
String ETag, String crc64ecma, String crc32cm, String versionId, String storageClass) {
31+
String ETag, String crc64ecma, String crc32cm, String versionId, String storageClass, Map<String, byte[]> userAttributes) {
3032
super(length, isdir, block_replication, blocksize, modification_time, access_time, permission, owner, group,
3133
path);
3234
this.ETag = ETag;
3335
this.crc64ecma = crc64ecma;
3436
this.crc32cm = crc32cm;
3537
this.storageClass = storageClass;
3638
this.versionId = versionId;
39+
this.userAttributes = userAttributes;
3740
}
3841

3942
public String getETag() {
@@ -59,4 +62,8 @@ public String getVersionId() {
5962
return versionId;
6063
}
6164

65+
@Nullable
66+
public Map<String, byte[]> getUserAttributes() {
67+
return userAttributes;
68+
}
6269
}

src/main/java/org/apache/hadoop/fs/CosNFileSystem.java

+114-79
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.apache.hadoop.fs.cosn.OperationCancellingStatusProvider;
1414
import org.apache.hadoop.fs.cosn.ReadBufferHolder;
1515
import org.apache.hadoop.fs.cosn.Unit;
16+
import org.apache.hadoop.fs.cosn.common.Pair;
1617
import org.apache.hadoop.fs.permission.FsPermission;
1718
import org.apache.hadoop.security.AccessControlException;
1819
import org.apache.hadoop.security.UserGroupInformation;
@@ -739,7 +740,7 @@ public FileStatus[] listStatus(Path f) throws IOException {
739740
listMaxLength = CosNFileSystem.POSIX_BUCKET_LIST_LIMIT;
740741
}
741742

742-
if (key.length() > 0) {
743+
if (!key.isEmpty()) {
743744
FileStatus fileStatus = this.getFileStatus(f);
744745
if (fileStatus.isFile() || fileStatus.isSymlink()) {
745746
return new FileStatus[]{fileStatus};
@@ -810,12 +811,14 @@ public FileStatus[] listStatus(Path f) throws IOException {
810811
return status.toArray(new FileStatus[status.size()]);
811812
}
812813

814+
815+
813816
private FileStatus newFile(FileMetadata meta, Path path) {
814817
return new CosNFileStatus(meta.getLength(), false, 1, getDefaultBlockSize(),
815818
meta.getLastModified(), 0, null, this.owner, this.group,
816819
path.makeQualified(this.getUri(), this.getWorkingDirectory()),
817820
meta.getETag(), meta.getCrc64ecm(), meta.getCrc32cm(),
818-
meta.getVersionId(), meta.getStorageClass());
821+
meta.getVersionId(), meta.getStorageClass(), meta.getUserAttributes());
819822
}
820823

821824
private FileStatus newDirectory(Path path) {
@@ -838,7 +841,7 @@ private FileStatus newDirectory(FileMetadata meta, Path path) {
838841
meta.getLastModified(), 0, null, this.owner, this.group,
839842
path.makeQualified(this.getUri(), this.getWorkingDirectory()),
840843
meta.getETag(), meta.getCrc64ecm(), meta.getCrc32cm(),
841-
meta.getVersionId(), meta.getStorageClass());
844+
meta.getVersionId(), meta.getStorageClass(), meta.getUserAttributes());
842845
}
843846

844847
/**
@@ -980,103 +983,133 @@ public FSDataInputStream open(Path f, int bufferSize) throws IOException {
980983

981984
@Override
982985
public boolean rename(Path src, Path dst) throws IOException {
983-
986+
Preconditions.checkNotNull(src);
987+
Preconditions.checkNotNull(dst);
984988
// Renaming the root directory is not allowed
985989
if (src.isRoot()) {
986990
LOG.debug("Cannot rename the root directory of a filesystem.");
987991
return false;
988992
}
989993

990-
// check the source path whether exists or not, if not return false.
991-
FileStatus srcFileStatus;
992-
try {
993-
srcFileStatus = this.getFileStatus(src);
994-
} catch (FileNotFoundException e) {
995-
LOG.debug("The source path [{}] is not exist.", src);
994+
// the preconditions for the rename operation.
995+
// reference: https://hadoop.apache.org/docs/r3.3.0/hadoop-project-dist/hadoop-common/filesystem/filesystem.html#rename
996+
Pair<CosNFileStatus, CosNFileStatus> renameFileStatusPair = renameInitiate(src, dst);
997+
998+
// the postconditions for the rename operation.
999+
// reference: https://hadoop.apache.org/docs/r3.3.0/hadoop-project-dist/hadoop-common/filesystem/filesystem.html#rename
1000+
if (src.equals(dst)) {
1001+
if (renameFileStatusPair.getFirst() != null) {
1002+
if (renameFileStatusPair.getFirst().isDirectory()) {
1003+
//Renaming a directory onto itself is no-op; return value is not specified.
1004+
//In POSIX the result is False; in HDFS the result is True.
1005+
return true;
1006+
}
1007+
if (renameFileStatusPair.getFirst().isFile()) {
1008+
// Renaming a file to itself is a no-op; the result is True.
1009+
return true;
1010+
}
1011+
// For symlink types, the Hadoop file system specification does not provide clear instructions,
1012+
// I tested the soft connection in the POSIX file system, and the same behavior is also true.
1013+
return true;
1014+
}
9961015
return false;
9971016
}
9981017

999-
// Source path and destination path are not allowed to be the same
1000-
if (src.equals(dst)) {
1001-
LOG.debug("The source path and the dest path refer to the same file or " +
1002-
"directory: {}", dst);
1003-
throw new IOException("the source path and dest path refer to the " +
1004-
"same file or directory");
1018+
if (!isPosixBucket) {
1019+
return internalCopyAndDelete(
1020+
src, renameFileStatusPair.getFirst(),
1021+
dst, renameFileStatusPair.getSecond());
1022+
} else {
1023+
return internalRename(src, dst);
10051024
}
1025+
}
1026+
1027+
private Pair<CosNFileStatus, CosNFileStatus> renameInitiate(Path srcPath, Path dstPath)
1028+
throws PathIOException, IOException {
1029+
// Preconditions
1030+
Preconditions.checkNotNull(srcPath);
1031+
Preconditions.checkNotNull(dstPath);
1032+
Preconditions.checkArgument(srcPath.isAbsolute());
1033+
Preconditions.checkArgument(dstPath.isAbsolute());
1034+
1035+
Pair<CosNFileStatus, CosNFileStatus> renameFileStatusPair = new Pair<>();
1036+
1037+
// Hadoop FileSystem Specification: if not exists(FS, src) : raise FileNotFoundException
1038+
CosNFileStatus srcFileStatus = null;
1039+
try {
1040+
srcFileStatus = (CosNFileStatus) this.getFileStatus(srcPath);
1041+
} catch (FileNotFoundException e) {
1042+
LOG.error("The source path [{}] is not exist.", srcPath);
1043+
throw e;
1044+
}
1045+
renameFileStatusPair.setFirst(srcFileStatus);
10061046

1007-
// It is not allowed to rename a parent directory to its subdirectory
1008-
Path dstParentPath;
1009-
dstParentPath = dst.getParent();
1010-
while (null != dstParentPath && !src.equals(dstParentPath)) {
1047+
// Hadoop FileSystem Specification: if isDescendant(FS, src, dest) : raise IOException
1048+
Path dstParentPath = dstPath.getParent();
1049+
while (null != dstParentPath && !srcPath.equals(dstParentPath)) {
10111050
dstParentPath = dstParentPath.getParent();
10121051
}
10131052
if (null != dstParentPath) {
1014-
LOG.debug("It is not allowed to rename a parent directory:{} to " +
1015-
"its subdirectory:{}.", src, dst);
1016-
throw new PathIOException(String.format(
1017-
"It is not allowed to rename a parent directory:%s to its" +
1018-
" subdirectory:%s",
1019-
src, dst));
1053+
LOG.error("It is not allowed to rename a parent directory:{} to its subdirectory:{}.", srcPath, dstPath);
1054+
PathIOException pathIOException = new PathIOException(srcPath.toString(),
1055+
"It is not allowed to rename a parent directory to its subdirectory");
1056+
pathIOException.setOperation("rename");
1057+
pathIOException.setTargetPath(dstPath.toString());
1058+
throw pathIOException;
10201059
}
10211060

1022-
FileStatus dstFileStatus = null;
1061+
// Hadoop FileSystem Specification: isRoot(FS, dest) or exists(FS, parent(dest))
1062+
CosNFileStatus dstFileStatus = null;
10231063
try {
1024-
dstFileStatus = this.getFileStatus(dst);
1025-
1026-
// The destination path exists and is a file,
1027-
// and the rename operation is not allowed.
1028-
//
1064+
dstFileStatus = (CosNFileStatus) this.getFileStatus(dstPath);
10291065
if (dstFileStatus.isFile()) {
1030-
LOG.debug("File: {} already exists.", dstFileStatus.getPath());
1031-
return false;
1066+
throw new FileAlreadyExistsException(dstPath.toString());
10321067
} else {
10331068
// The destination path is an existing directory,
1034-
// and it is checked whether there is a file or directory
1035-
// with the same name as the source path under the
1036-
// destination path
1037-
dst = new Path(dst, src.getName());
1038-
FileStatus[] statuses;
1069+
Path tempDstPath = new Path(dstPath, srcPath.getName());
10391070
try {
1040-
statuses = this.listStatus(dst);
1041-
} catch (FileNotFoundException e) {
1042-
statuses = null;
1043-
}
1044-
if (null != statuses && statuses.length > 0) {
1045-
LOG.debug("Cannot rename {} to {}, the destination directory is non-empty.",
1046-
src, dst);
1047-
return false;
1071+
// FileStatus tempDstFileStatus = this.getFileStatus(tempDstPath);
1072+
// if (tempDstFileStatus.isDirectory()) {
1073+
// throw new FileAlreadyExistsException(tempDstPath.toString());
1074+
// }
1075+
FileStatus[] fileStatuses = this.listStatus(tempDstPath);
1076+
if (null != fileStatuses && fileStatuses.length > 0) {
1077+
throw new FileAlreadyExistsException(tempDstPath.toString());
1078+
}
1079+
} catch (FileNotFoundException ignore) {
1080+
// OK, expects Not Found.
10481081
}
10491082
}
1083+
renameFileStatusPair.setSecond(dstFileStatus);
10501084
} catch (FileNotFoundException e) {
1051-
// destination path not exists
1052-
Path tempDstParentPath = dst.getParent();
1085+
// Hadoop FileSystem Specification: if isFile(FS, parent(dest)) : raise IOException
1086+
Path tempDstParentPath = dstPath.getParent();
10531087
FileStatus dstParentStatus = this.getFileStatus(tempDstParentPath);
10541088
if (!dstParentStatus.isDirectory()) {
1055-
throw new IOException(String.format(
1056-
"Cannot rename %s to %s, %s is a file", src, dst, dst.getParent()
1057-
));
1089+
PathIOException pathIOException = new PathIOException(tempDstParentPath.toString(),
1090+
String.format("Can not rename into a file [%s]", tempDstParentPath));
1091+
pathIOException.setTargetPath(dstPath.toString());
1092+
throw pathIOException;
10581093
}
1059-
// The default root directory is definitely there.
10601094
}
10611095

1062-
if (!isPosixBucket) {
1063-
return internalCopyAndDelete(src, dst, srcFileStatus.isDirectory(),
1064-
srcFileStatus.isSymlink());
1065-
} else {
1066-
return internalRename(src, dst);
1067-
}
1096+
return renameFileStatusPair;
10681097
}
10691098

1070-
private boolean internalCopyAndDelete(Path srcPath, Path dstPath,
1071-
boolean isDir, boolean isSymlink) throws IOException {
1072-
boolean result = false;
1073-
if (isDir) {
1074-
result = this.copyDirectory(srcPath, dstPath);
1099+
private boolean internalCopyAndDelete(Path srcPath, CosNFileStatus srcFileStatus,
1100+
Path destPath, CosNFileStatus destFileStatus) throws IOException {
1101+
Preconditions.checkNotNull(srcPath);
1102+
Preconditions.checkNotNull(srcFileStatus);
1103+
boolean result;
1104+
if (srcFileStatus.isDirectory()) {
1105+
result = this.copyDirectory(
1106+
srcPath, srcFileStatus,
1107+
destPath, destFileStatus);
10751108
} else {
1076-
if (isSymlink) {
1077-
result = this.copySymlink(srcPath, dstPath);
1109+
if (srcFileStatus.isSymlink()) {
1110+
result = this.copySymlink(srcPath, destPath);
10781111
} else {
1079-
result = this.copyFile(srcPath, dstPath);
1112+
result = this.copyFile(srcPath, destPath);
10801113
}
10811114
}
10821115

@@ -1106,26 +1139,28 @@ private boolean copySymlink(Path srcSymlink, Path dstSymlink) throws IOException
11061139
return true;
11071140
}
11081141

1109-
private boolean copyDirectory(Path srcPath, Path dstPath) throws IOException {
1142+
private boolean copyDirectory(Path srcPath, CosNFileStatus srcFileStatus,
1143+
Path destPath, CosNFileStatus destFileStatus) throws IOException {
11101144
String srcKey = pathToKey(srcPath);
11111145
if (!srcKey.endsWith(PATH_DELIMITER)) {
11121146
srcKey += PATH_DELIMITER;
11131147
}
1114-
String dstKey = pathToKey(dstPath);
1115-
if (!dstKey.endsWith(PATH_DELIMITER)) {
1116-
dstKey += PATH_DELIMITER;
1148+
String destKey = pathToKey(destPath);
1149+
if (!destKey.endsWith(PATH_DELIMITER)) {
1150+
destKey += PATH_DELIMITER;
11171151
}
11181152

1119-
if (dstKey.startsWith(srcKey)) {
1120-
throw new IOException("can not copy a directory to a subdirectory" +
1121-
" of self");
1153+
if (destKey.startsWith(srcKey)) {
1154+
throw new IOException("can not copy a directory to a subdirectory of self");
11221155
}
11231156
// 这个方法是普通桶调用,普通桶严格区分文件对象和目录对象,这里srcKey是带后缀的,如果使用retrieveMetadata
11241157
// 可能会吞掉目录对象不存在的问题。导致后面的copy srcKey时,报404错误。
1125-
if (this.nativeStore.queryObjectMetadata(srcKey) == null) {
1158+
if (srcFileStatus.getETag() == null) {
1159+
// 这里其实无论是否存在对应的 srcKey 空对象,都 put 一个进去,也是对的。
1160+
// srcFileStatus.getETag() == null 只是为了在确定存在一个目录对象而非前缀的时候,就不需要在 PUT 一次了。
11261161
this.nativeStore.storeEmptyFile(srcKey);
11271162
} else {
1128-
this.nativeStore.copy(srcKey, dstKey);
1163+
this.nativeStore.copy(srcKey, FileMetadata.fromCosNFileStatus(srcFileStatus), destKey);
11291164
}
11301165

11311166
CosNCopyFileContext copyFileContext = new CosNCopyFileContext();
@@ -1139,7 +1174,7 @@ private boolean copyDirectory(Path srcPath, Path dstPath) throws IOException {
11391174
checkPermission(new Path(file.getKey()), RangerAccessType.DELETE);
11401175
this.boundedCopyThreadPool.execute(new CosNCopyFileTask(
11411176
this.nativeStore,
1142-
file.getKey(), dstKey.concat(file.getKey().substring(srcKey.length())),
1177+
file.getKey(), destKey.concat(file.getKey().substring(srcKey.length())),
11431178
copyFileContext));
11441179
copiesToFinishes++;
11451180
}
@@ -1152,9 +1187,9 @@ private boolean copyDirectory(Path srcPath, Path dstPath) throws IOException {
11521187
if (this.operationCancellingStatusProviderThreadLocal.get() != null
11531188
&& this.operationCancellingStatusProviderThreadLocal.get().isCancelled()) {
11541189
LOG.warn("The copy operation is cancelled. Stop copying the directory. srcKey: {}, dstKey: {}",
1155-
srcKey, dstKey);
1190+
srcKey, destKey);
11561191
throw new IOException(String.format("The copy operation is cancelled. srcKey: %s, dstKey: %s",
1157-
srcKey, dstKey));
1192+
srcKey, destKey));
11581193
}
11591194
} while (null != priorLastKey && !Thread.currentThread().isInterrupted());
11601195

src/main/java/org/apache/hadoop/fs/CosNativeFileSystemStore.java

+11-4
Original file line numberDiff line numberDiff line change
@@ -1499,18 +1499,25 @@ public void deleteRecursive(String key) throws IOException {
14991499

15001500
@Override
15011501
public void copy(String srcKey, String dstKey) throws IOException {
1502+
copy(srcKey, null , dstKey);
1503+
}
1504+
1505+
@Override
1506+
public void copy(String srcKey, FileMetadata srcFileMetadata, String dstKey) throws IOException {
15021507
try {
1503-
FileMetadata sourceFileMetadata = this.retrieveMetadata(srcKey);
1504-
ObjectMetadata objectMetadata = getClientSideEncryptionHeader(sourceFileMetadata);
1508+
if (srcFileMetadata == null) {
1509+
srcFileMetadata = this.retrieveMetadata(srcKey);
1510+
}
1511+
ObjectMetadata objectMetadata = getClientSideEncryptionHeader(srcFileMetadata);
15051512
if (crc32cEnabled) {
15061513
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
15071514
}
15081515

15091516
CopyObjectRequest copyObjectRequest =
15101517
new CopyObjectRequest(bucketName, srcKey, bucketName, dstKey);
15111518
// 如果 sourceFileMetadata 为 null,则有可能这个文件是个软链接,但是也兼容copy
1512-
if (null != sourceFileMetadata && null != sourceFileMetadata.getStorageClass()) {
1513-
copyObjectRequest.setStorageClass(sourceFileMetadata.getStorageClass());
1519+
if (null != srcFileMetadata.getStorageClass()) {
1520+
copyObjectRequest.setStorageClass(srcFileMetadata.getStorageClass());
15141521
}
15151522
copyObjectRequest.setNewObjectMetadata(objectMetadata);
15161523
this.setEncryptionMetadata(copyObjectRequest, objectMetadata);

src/main/java/org/apache/hadoop/fs/FileMetadata.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ static FileMetadata fromCosNFileStatus(CosNFileStatus fileStatus) {
2828
String key = CosNUtils.pathToKey(fileStatus.getPath());
2929
return new FileMetadata(key, fileStatus.getLen(), fileStatus.getModificationTime(),
3030
fileStatus.isFile(), fileStatus.getETag(), fileStatus.getCrc64ecma(), fileStatus.getCrc32cm(),
31-
fileStatus.getVersionId(), fileStatus.getStorageClass(), null);
31+
fileStatus.getVersionId(), fileStatus.getStorageClass(), fileStatus.getUserAttributes());
3232
}
3333

3434
public FileMetadata(String key, long length, long lastModified) {

0 commit comments

Comments
 (0)