You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@jackrabbit.apache.org by Dan Diephouse <da...@mulesource.com> on 2010/06/23 01:36:30 UTC

Removing blobs from filesystem with a DB PersistenceManager

We are running 1.6.x and noticing that blobs are not getting deleted from
the filesystem. We're just setting the data on the node like this:

// these are required since we inherit from nt:file
Node resNode = versionNode.addNode("jcr:content", "nt:resource");
resNode.setProperty("jcr:mimeType", contentType);
resNode.setProperty("jcr:lastModified", now);
resNode.setProperty("jcr:data", inputStream);

When the parent node is deleted though, the data blob is left on the
filesystem. Config is attached below.

Ok, now I've seen this:
http://wiki.apache.org/jackrabbit/DataStore#Running_Data_Store_Garbage_Collection_.28Jackrabbit_1.x.29

But I don't have a datastore configured, I have a PersistenceManager with
external blobs. I understand that we should probably switch to datastore but
that isn't an option. So two questions:

1) I can see the delete of the node happening in the Jackrabbit internals.
Why isn't it also deleting the blob?!
2) Is there any way to go through the blobs directory and figure out which
ones aren't referenced any more so we can clean things up for someone who
has already deployed our app?

Thanks,
Dan

<Repository>
    <!--
        virtual file system where the repository stores global state
        (e.g. registered namespaces, custom node types, etc.)
    -->
    <FileSystem class="org.apache.jackrabbit.core.fs.db.DerbyFileSystem">
        <param name="url" value="jdbc:derby:${rep.home}/db;create=true"/>
        <param name="schemaObjectPrefix" value="repo_"/>
    </FileSystem>

    <!--
        security configuration
    -->
    <Security appName="Jackrabbit">
        <!--
            access manager:
            class: FQN of class implementing the AccessManager interface
        -->
        <AccessManager
class="org.apache.jackrabbit.core.security.SimpleAccessManager">
            <!-- <param name="config" value="${rep.home}/access.xml"/> -->
        </AccessManager>

        <LoginModule
class="org.apache.jackrabbit.core.security.SimpleLoginModule">
           <!-- anonymous user name ('anonymous' is the default value) -->
           <param name="anonymousId" value="anonymous"/>
           <!--
              default user name to be used instead of the anonymous user
              when no login credentials are provided (unset by default)
           -->
           <!-- <param name="defaultUserId" value="superuser"/> -->
        </LoginModule>
    </Security>

    <!--
        location of workspaces root directory and name of default workspace
    -->
    <Workspaces rootPath="${rep.home}/workspaces"
defaultWorkspace="default"/>
    <!--
        workspace configuration template:
        used to create the initial workspace if there's no workspace yet
    -->
    <Workspace name="Jackrabbit Core">
        <!--
            virtual file system of the workspace:
            class: FQN of class implementing the FileSystem interface
        -->
        <FileSystem
class="org.apache.jackrabbit.core.fs.db.DerbyFileSystem">
            <param name="url"
value="jdbc:derby:${rep.home}/db;create=true"/>
            <param name="schemaObjectPrefix" value="workspace_"/>
            <param name="shutdownOnClose" value="false"/>
        </FileSystem>
        <!--
            persistence manager of the workspace:
            class: FQN of class implementing the PersistenceManager
interface
        -->
        <PersistenceManager
class="org.apache.jackrabbit.core.persistence.db.DerbyPersistenceManager">
          <param name="schemaObjectPrefix" value="Jackrabbit Core_"/>
          <param name="url" value="jdbc:derby:${rep.home}/db;create=true"/>
          <param name="shutdownOnClose" value="false"/>
          <param name="externalBLOBs" value="true"/>
        </PersistenceManager>
        <!--
            Search index and the file system it uses.
            class: FQN of class implementing the QueryHandler interface
        -->
        <SearchIndex
class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
            <param name="path" value="${wsp.home}/index"/>
            <param name="textFilterClasses"

 value="org.apache.jackrabbit.core.query.lucene.TextPlainTextFilter,
                       org.apache.jackrabbit.extractor.MsExcelTextExtractor,

org.apache.jackrabbit.extractor.MsPowerPointTextExtractor,
                       org.apache.jackrabbit.extractor.MsWordTextExtractor,
                       org.apache.jackrabbit.extractor.PdfTextExtractor,
                       org.apache.jackrabbit.extractor.HTMLTextExtractor,
                       org.apache.jackrabbit.extractor.XMLTextExtractor,
                       org.apache.jackrabbit.extractor.RTFTextExtractor,

org.apache.jackrabbit.extractor.OpenOfficeTextExtractor" />
            <FileSystem
class="org.apache.jackrabbit.core.fs.db.DerbyFileSystem">
                <param name="url"
value="jdbc:derby:${rep.home}/db;create=true"/>
                <param name="schemaObjectPrefix" value="search_index_w_"/>
                <param name="shutdownOnClose" value="false"/>
            </FileSystem>
        </SearchIndex>
    </Workspace>

    <!--
        Configures the versioning
    -->
    <Versioning rootPath="${rep.home}/version">
        <!--
            Configures the filesystem to use for versioning for the
respective
            persistence manager
        -->
        <FileSystem
class="org.apache.jackrabbit.core.fs.db.DerbyFileSystem">
            <param name="url"
value="jdbc:derby:${rep.home}/db;create=true"/>
            <param name="schemaObjectPrefix" value="filesystem_version_"/>
            <param name="shutdownOnClose" value="false"/>
        </FileSystem>

        <!--
            Configures the persistence manager to be used for persisting
version state.
            Please note that the current versioning implementation is based
on
            a 'normal' persistence manager, but this could change in future
            implementations.
        -->
        <PersistenceManager
class="org.apache.jackrabbit.core.persistence.db.DerbyPersistenceManager">
          <param name="schemaObjectPrefix" value="version_"/>
          <param name="url" value="jdbc:derby:${rep.home}/db;create=true"/>
          <param name="shutdownOnClose" value="false"/>
          <param name="externalBLOBs" value="true"/>
        </PersistenceManager>
    </Versioning>

    <!--
        Search index for content that is shared repository wide
        (/jcr:system tree, contains mainly versions)

        -->
    <SearchIndex
class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
        <param name="path" value="${rep.home}/repository/index"/>
        <FileSystem
class="org.apache.jackrabbit.core.fs.db.DerbyFileSystem">
            <param name="url"
value="jdbc:derby:${rep.home}/db;create=true"/>
            <param name="schemaObjectPrefix" value="search_index_"/>
            <param name="shutdownOnClose" value="false"/>
        </FileSystem>
    </SearchIndex>
</Repository>




-- 
Dan Diephouse
http://mulesource.com | http://netzooid.com/blog

Re: Removing blobs from filesystem with a DB PersistenceManager

Posted by Dan Diephouse <da...@mulesource.com>.
Umm, nevermind this was something blatantly stupid on my part. Jackrabbit
works as expected.

Dan

On Tue, Jun 22, 2010 at 4:36 PM, Dan Diephouse <dan.diephouse@mulesource.com
> wrote:

> We are running 1.6.x and noticing that blobs are not getting deleted from
> the filesystem. We're just setting the data on the node like this:
>
> // these are required since we inherit from nt:file
> Node resNode = versionNode.addNode("jcr:content", "nt:resource");
> resNode.setProperty("jcr:mimeType", contentType);
> resNode.setProperty("jcr:lastModified", now);
> resNode.setProperty("jcr:data", inputStream);
>
> When the parent node is deleted though, the data blob is left on the
> filesystem. Config is attached below.
>
> Ok, now I've seen this:
> http://wiki.apache.org/jackrabbit/DataStore#Running_Data_Store_Garbage_Collection_.28Jackrabbit_1.x.29
>
> But I don't have a datastore configured, I have a PersistenceManager with
> external blobs. I understand that we should probably switch to datastore but
> that isn't an option. So two questions:
>
> 1) I can see the delete of the node happening in the Jackrabbit internals.
> Why isn't it also deleting the blob?!
> 2) Is there any way to go through the blobs directory and figure out which
> ones aren't referenced any more so we can clean things up for someone who
> has already deployed our app?
>
> Thanks,
> Dan
>
> <Repository>
>     <!--
>         virtual file system where the repository stores global state
>         (e.g. registered namespaces, custom node types, etc.)
>     -->
>     <FileSystem class="org.apache.jackrabbit.core.fs.db.DerbyFileSystem">
>         <param name="url" value="jdbc:derby:${rep.home}/db;create=true"/>
>         <param name="schemaObjectPrefix" value="repo_"/>
>     </FileSystem>
>
>     <!--
>         security configuration
>     -->
>     <Security appName="Jackrabbit">
>         <!--
>             access manager:
>             class: FQN of class implementing the AccessManager interface
>         -->
>         <AccessManager
> class="org.apache.jackrabbit.core.security.SimpleAccessManager">
>             <!-- <param name="config" value="${rep.home}/access.xml"/> -->
>         </AccessManager>
>
>         <LoginModule
> class="org.apache.jackrabbit.core.security.SimpleLoginModule">
>            <!-- anonymous user name ('anonymous' is the default value) -->
>            <param name="anonymousId" value="anonymous"/>
>            <!--
>               default user name to be used instead of the anonymous user
>               when no login credentials are provided (unset by default)
>            -->
>            <!-- <param name="defaultUserId" value="superuser"/> -->
>         </LoginModule>
>     </Security>
>
>     <!--
>         location of workspaces root directory and name of default workspace
>     -->
>     <Workspaces rootPath="${rep.home}/workspaces"
> defaultWorkspace="default"/>
>     <!--
>         workspace configuration template:
>         used to create the initial workspace if there's no workspace yet
>     -->
>     <Workspace name="Jackrabbit Core">
>         <!--
>             virtual file system of the workspace:
>             class: FQN of class implementing the FileSystem interface
>         -->
>         <FileSystem
> class="org.apache.jackrabbit.core.fs.db.DerbyFileSystem">
>             <param name="url"
> value="jdbc:derby:${rep.home}/db;create=true"/>
>             <param name="schemaObjectPrefix" value="workspace_"/>
>             <param name="shutdownOnClose" value="false"/>
>         </FileSystem>
>         <!--
>             persistence manager of the workspace:
>             class: FQN of class implementing the PersistenceManager
> interface
>         -->
>         <PersistenceManager
> class="org.apache.jackrabbit.core.persistence.db.DerbyPersistenceManager">
>           <param name="schemaObjectPrefix" value="Jackrabbit Core_"/>
>           <param name="url" value="jdbc:derby:${rep.home}/db;create=true"/>
>           <param name="shutdownOnClose" value="false"/>
>           <param name="externalBLOBs" value="true"/>
>         </PersistenceManager>
>         <!--
>             Search index and the file system it uses.
>             class: FQN of class implementing the QueryHandler interface
>         -->
>         <SearchIndex
> class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
>             <param name="path" value="${wsp.home}/index"/>
>             <param name="textFilterClasses"
>
>  value="org.apache.jackrabbit.core.query.lucene.TextPlainTextFilter,
>
> org.apache.jackrabbit.extractor.MsExcelTextExtractor,
>
> org.apache.jackrabbit.extractor.MsPowerPointTextExtractor,
>                        org.apache.jackrabbit.extractor.MsWordTextExtractor,
>                        org.apache.jackrabbit.extractor.PdfTextExtractor,
>                        org.apache.jackrabbit.extractor.HTMLTextExtractor,
>                        org.apache.jackrabbit.extractor.XMLTextExtractor,
>                        org.apache.jackrabbit.extractor.RTFTextExtractor,
>
> org.apache.jackrabbit.extractor.OpenOfficeTextExtractor" />
>             <FileSystem
> class="org.apache.jackrabbit.core.fs.db.DerbyFileSystem">
>                 <param name="url"
> value="jdbc:derby:${rep.home}/db;create=true"/>
>                 <param name="schemaObjectPrefix" value="search_index_w_"/>
>                 <param name="shutdownOnClose" value="false"/>
>             </FileSystem>
>         </SearchIndex>
>     </Workspace>
>
>     <!--
>         Configures the versioning
>     -->
>     <Versioning rootPath="${rep.home}/version">
>         <!--
>             Configures the filesystem to use for versioning for the
> respective
>             persistence manager
>         -->
>         <FileSystem
> class="org.apache.jackrabbit.core.fs.db.DerbyFileSystem">
>             <param name="url"
> value="jdbc:derby:${rep.home}/db;create=true"/>
>             <param name="schemaObjectPrefix" value="filesystem_version_"/>
>             <param name="shutdownOnClose" value="false"/>
>         </FileSystem>
>
>         <!--
>             Configures the persistence manager to be used for persisting
> version state.
>             Please note that the current versioning implementation is based
> on
>             a 'normal' persistence manager, but this could change in future
>             implementations.
>         -->
>         <PersistenceManager
> class="org.apache.jackrabbit.core.persistence.db.DerbyPersistenceManager">
>           <param name="schemaObjectPrefix" value="version_"/>
>           <param name="url" value="jdbc:derby:${rep.home}/db;create=true"/>
>           <param name="shutdownOnClose" value="false"/>
>           <param name="externalBLOBs" value="true"/>
>         </PersistenceManager>
>     </Versioning>
>
>     <!--
>         Search index for content that is shared repository wide
>         (/jcr:system tree, contains mainly versions)
>
>         -->
>     <SearchIndex
> class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
>         <param name="path" value="${rep.home}/repository/index"/>
>         <FileSystem
> class="org.apache.jackrabbit.core.fs.db.DerbyFileSystem">
>             <param name="url"
> value="jdbc:derby:${rep.home}/db;create=true"/>
>             <param name="schemaObjectPrefix" value="search_index_"/>
>             <param name="shutdownOnClose" value="false"/>
>         </FileSystem>
>     </SearchIndex>
> </Repository>
>
>
>
>
> --
> Dan Diephouse
> http://mulesource.com | http://netzooid.com/blog
>



-- 
Dan Diephouse
http://mulesource.com | http://netzooid.com/blog