You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by ma...@apache.org on 2019/12/11 08:10:38 UTC
[cassandra] branch cassandra-3.0 updated: Avoid over-scanning data
directories in LogFile.verify()
This is an automated email from the ASF dual-hosted git repository.
marcuse pushed a commit to branch cassandra-3.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-3.0 by this push:
new af963e3 Avoid over-scanning data directories in LogFile.verify()
af963e3 is described below
commit af963e3a7318aad177bc1b985478d4a598b51d9c
Author: Marcus Eriksson <ma...@apache.org>
AuthorDate: Thu Oct 17 10:20:24 2019 +0200
Avoid over-scanning data directories in LogFile.verify()
Patch by marcuse; reviewed by David Capwell and Jordan West for CASSANDRA-15364
---
CHANGES.txt | 1 +
.../org/apache/cassandra/db/lifecycle/LogFile.java | 71 ++++++++++++++--------
.../apache/cassandra/db/lifecycle/LogRecord.java | 48 +++++++--------
.../cassandra/db/lifecycle/LogTransaction.java | 4 +-
4 files changed, 71 insertions(+), 53 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index fe3702c..f47bfa8 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
3.0.20
+ * Avoid over-scanning data directories in LogFile.verify() (CASSANDRA-15364)
* Bump generations and document changes to system_distributed and system_traces in 3.0, 3.11
(CASSANDRA-15441)
* Fix system_traces creation timestamp; optimise system keyspace upgrades (CASSANDRA-15398)
diff --git a/src/java/org/apache/cassandra/db/lifecycle/LogFile.java b/src/java/org/apache/cassandra/db/lifecycle/LogFile.java
index e9047ad..6e820df 100644
--- a/src/java/org/apache/cassandra/db/lifecycle/LogFile.java
+++ b/src/java/org/apache/cassandra/db/lifecycle/LogFile.java
@@ -156,7 +156,21 @@ final class LogFile implements AutoCloseable
return false;
}
- records.forEach(LogFile::verifyRecord);
+ Set<String> absolutePaths = new HashSet<>();
+ for (LogRecord record : records)
+ record.absolutePath.ifPresent(absolutePaths::add);
+
+ Map<String, List<File>> recordFiles = LogRecord.getExistingFiles(absolutePaths);
+ for (LogRecord record : records)
+ {
+ List<File> existingFiles = Collections.emptyList();
+ if (record.absolutePath.isPresent())
+ {
+ String key = record.absolutePath.get();
+ existingFiles = recordFiles.getOrDefault(key, Collections.emptyList());
+ }
+ LogFile.verifyRecord(record, existingFiles);
+ }
Optional<LogRecord> firstInvalid = records.stream().filter(LogRecord::isInvalidOrPartial).findFirst();
if (!firstInvalid.isPresent())
@@ -195,7 +209,7 @@ final class LogFile implements AutoCloseable
return record;
}
- static void verifyRecord(LogRecord record)
+ static void verifyRecord(LogRecord record, List<File> existingFiles)
{
if (record.checksum != record.computeChecksum())
{
@@ -216,7 +230,7 @@ final class LogFile implements AutoCloseable
// file that obsoleted the very same files. So we check the latest update time and make sure
// it matches. Because we delete files from oldest to newest, the latest update time should
// always match.
- record.status.onDiskRecord = record.withExistingFiles();
+ record.status.onDiskRecord = record.withExistingFiles(existingFiles);
if (record.updateTime != record.status.onDiskRecord.updateTime && record.status.onDiskRecord.updateTime > 0)
{
record.setError(String.format("Unexpected files detected for sstable [%s], " +
@@ -277,9 +291,9 @@ final class LogFile implements AutoCloseable
return committed() || aborted();
}
- void add(Type type, SSTable table)
+ void add(SSTable table)
{
- addRecord(makeRecord(type, table));
+ addRecord(makeAddRecord(table));
}
public void addAll(Type type, Iterable<SSTableReader> toBulkAdd)
@@ -300,13 +314,11 @@ final class LogFile implements AutoCloseable
return LogRecord.make(type, tables);
}
- private LogRecord makeRecord(Type type, SSTable table)
+ private LogRecord makeAddRecord(SSTable table)
{
- assert type == Type.ADD || type == Type.REMOVE;
-
File folder = table.descriptor.directory;
replicas.maybeCreateReplica(folder, getFileName(folder), records);
- return LogRecord.make(type, table);
+ return LogRecord.make(Type.ADD, table);
}
/**
@@ -336,20 +348,15 @@ final class LogFile implements AutoCloseable
throw new IllegalStateException("Failed to add record");
}
- void remove(Type type, SSTable table)
+ void remove(SSTable table)
{
- LogRecord record = makeRecord(type, table);
+ LogRecord record = makeAddRecord(table);
assert records.contains(record) : String.format("[%s] is not tracked by %s", record, id);
-
- deleteRecordFiles(record);
+ assert record.absolutePath.isPresent();
+ deleteRecordFiles(LogRecord.getExistingFiles(record.absolutePath.get()));
records.remove(record);
}
- boolean contains(Type type, SSTable table)
- {
- return contains(makeRecord(type, table));
- }
-
boolean contains(Type type, SSTable sstable, LogRecord record)
{
return contains(makeRecord(type, sstable, record));
@@ -362,21 +369,31 @@ final class LogFile implements AutoCloseable
void deleteFilesForRecordsOfType(Type type)
{
- records.stream()
- .filter(type::matches)
- .forEach(LogFile::deleteRecordFiles);
+ assert type == Type.REMOVE || type == Type.ADD;
+ Set<String> absolutePaths = new HashSet<>();
+ for (LogRecord record : records)
+ {
+ if (type.matches(record))
+ {
+ assert record.absolutePath.isPresent() : "type is either REMOVE or ADD, record should always have an absolutePath: " + record;
+ absolutePaths.add(record.absolutePath.get());
+ }
+ }
+
+ Map<String, List<File>> existingFiles = LogRecord.getExistingFiles(absolutePaths);
+
+ for (List<File> toDelete : existingFiles.values())
+ LogFile.deleteRecordFiles(toDelete);
+
records.clear();
}
- private static void deleteRecordFiles(LogRecord record)
+ private static void deleteRecordFiles(List<File> existingFiles)
{
- List<File> files = record.getExistingFiles();
-
// we sort the files in ascending update time order so that the last update time
// stays the same even if we only partially delete files, see comment in isInvalid()
- files.sort((f1, f2) -> Long.compare(f1.lastModified(), f2.lastModified()));
-
- files.forEach(LogTransaction::delete);
+ existingFiles.sort(Comparator.comparingLong(File::lastModified));
+ existingFiles.forEach(LogTransaction::delete);
}
/**
diff --git a/src/java/org/apache/cassandra/db/lifecycle/LogRecord.java b/src/java/org/apache/cassandra/db/lifecycle/LogRecord.java
index 1dc17f6..69b4920 100644
--- a/src/java/org/apache/cassandra/db/lifecycle/LogRecord.java
+++ b/src/java/org/apache/cassandra/db/lifecycle/LogRecord.java
@@ -31,7 +31,6 @@ import java.util.stream.Collectors;
import java.util.zip.CRC32;
import org.apache.cassandra.io.sstable.Component;
-import org.apache.cassandra.io.sstable.Descriptor;
import org.apache.cassandra.io.sstable.SSTable;
import org.apache.cassandra.io.sstable.format.SSTableReader;
import org.apache.cassandra.io.util.FileUtils;
@@ -183,9 +182,9 @@ final class LogRecord
return FileUtils.getCanonicalPath(baseFilename + Component.separator);
}
- public LogRecord withExistingFiles()
+ public LogRecord withExistingFiles(List<File> existingFiles)
{
- return make(type, getExistingFiles(), 0, absolutePath.get());
+ return make(type, existingFiles, 0, absolutePath.get());
}
public static LogRecord make(Type type, List<File> files, int minFiles, String absolutePath)
@@ -289,12 +288,6 @@ final class LogRecord
checksum);
}
- public List<File> getExistingFiles()
- {
- assert absolutePath.isPresent() : "Expected a path in order to get existing files";
- return getExistingFiles(absolutePath.get());
- }
-
public static List<File> getExistingFiles(String absoluteFilePath)
{
Path path = Paths.get(absoluteFilePath);
@@ -304,34 +297,41 @@ final class LogRecord
}
/**
- * absoluteFilePaths contains full file parts up to the component name
+ * absoluteFilePaths contains full file parts up to (but excluding) the component name
+ *
+ * This method finds all files on disk beginning with any of the paths in absoluteFilePaths
*
- * this method finds all files on disk beginning with any of the paths in absoluteFilePaths
* @return a map from absoluteFilePath to actual file on disk.
*/
public static Map<String, List<File>> getExistingFiles(Set<String> absoluteFilePaths)
{
- Set<File> uniqueDirectories = absoluteFilePaths.stream().map(path -> Paths.get(path).getParent().toFile()).collect(Collectors.toSet());
Map<String, List<File>> fileMap = new HashMap<>();
+ Map<File, TreeSet<String>> dirToFileNamePrefix = new HashMap<>();
+ for (String absolutePath : absoluteFilePaths)
+ {
+ Path fullPath = Paths.get(absolutePath);
+ Path path = fullPath.getParent();
+ if (path != null)
+ dirToFileNamePrefix.computeIfAbsent(path.toFile(), (k) -> new TreeSet<>()).add(fullPath.getFileName().toString());
+ }
+
FilenameFilter ff = (dir, name) -> {
- Descriptor descriptor = null;
- try
+ TreeSet<String> dirSet = dirToFileNamePrefix.get(dir);
+ // if the set contains a prefix of the current file name, the file name we have here should sort directly
+ // after the prefix in the tree set, which means we can use 'floor' to get the prefix (returns the largest
+ // of the smaller strings in the set). Also note that the prefixes always end with '-' which means we won't
+ // have "xy-1111-Data.db".startsWith("xy-11") below (we'd get "xy-1111-Data.db".startsWith("xy-11-"))
+ String baseName = dirSet.floor(name);
+ if (baseName != null && name.startsWith(baseName))
{
- descriptor = Descriptor.fromFilename(dir, name).left;
- }
- catch (Throwable t)
- {// ignored - if we can't parse the filename, just skip the file
- }
-
- String absolutePath = descriptor != null ? absolutePath(descriptor.baseFilename()) : null;
- if (absolutePath != null && absoluteFilePaths.contains(absolutePath))
+ String absolutePath = new File(dir, baseName).getPath();
fileMap.computeIfAbsent(absolutePath, k -> new ArrayList<>()).add(new File(dir, name));
-
+ }
return false;
};
// populate the file map:
- for (File f : uniqueDirectories)
+ for (File f : dirToFileNamePrefix.keySet())
f.listFiles(ff);
return fileMap;
diff --git a/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java b/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java
index 9bbf69d..d8fc633 100644
--- a/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java
+++ b/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java
@@ -133,7 +133,7 @@ class LogTransaction extends Transactional.AbstractTransactional implements Tran
**/
void trackNew(SSTable table)
{
- txnFile.add(Type.ADD, table);
+ txnFile.add(table);
}
/**
@@ -141,7 +141,7 @@ class LogTransaction extends Transactional.AbstractTransactional implements Tran
*/
void untrackNew(SSTable table)
{
- txnFile.remove(Type.ADD, table);
+ txnFile.remove(table);
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org